From 3a84aef64a1992c75b5638c9475397b6cf24a186 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Wed, 5 Nov 2025 10:24:16 -0500 Subject: [PATCH 01/61] [PowerPC][NFC] auto gen checks vec rounding tests (#166435) Update tests to contain auto generated checks. --- llvm/test/CodeGen/PowerPC/vec_rounding.ll | 195 +++++++++++++++------- 1 file changed, 137 insertions(+), 58 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/vec_rounding.ll b/llvm/test/CodeGen/PowerPC/vec_rounding.ll index 2f16a435440ff..438c8ebdc099e 100644 --- a/llvm/test/CodeGen/PowerPC/vec_rounding.ll +++ b/llvm/test/CodeGen/PowerPC/vec_rounding.ll @@ -1,172 +1,251 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s ; Check vector round to single-precision toward -infinity (vrfim) ; instruction generation using Altivec. -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" -target triple = "powerpc64-unknown-linux-gnu" - declare <2 x double> @llvm.floor.v2f64(<2 x double> %p) define <2 x double> @floor_v2f64(<2 x double> %p) +; CHECK-LABEL: floor_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: frim 1, 1 +; CHECK-NEXT: frim 2, 2 +; CHECK-NEXT: blr { %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) ret <2 x double> %t } -; CHECK-LABEL: floor_v2f64: -; CHECK: frim -; CHECK: frim declare <4 x double> @llvm.floor.v4f64(<4 x double> %p) define <4 x double> @floor_v4f64(<4 x double> %p) +; CHECK-LABEL: floor_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: frim 1, 1 +; CHECK-NEXT: frim 2, 2 +; CHECK-NEXT: frim 3, 3 +; CHECK-NEXT: frim 4, 4 +; CHECK-NEXT: blr { %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) ret <4 x double> %t } -; CHECK-LABEL: floor_v4f64: -; CHECK: frim -; CHECK: frim -; CHECK: frim -; CHECK: frim declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p) define <2 x double> @ceil_v2f64(<2 x double> %p) +; CHECK-LABEL: ceil_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: frip 1, 1 +; CHECK-NEXT: frip 2, 2 +; CHECK-NEXT: blr { %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) ret <2 x double> %t } -; CHECK-LABEL: ceil_v2f64: -; CHECK: frip -; CHECK: frip declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p) define <4 x double> @ceil_v4f64(<4 x double> %p) +; CHECK-LABEL: ceil_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: frip 1, 1 +; CHECK-NEXT: frip 2, 2 +; CHECK-NEXT: frip 3, 3 +; CHECK-NEXT: frip 4, 4 +; CHECK-NEXT: blr { %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) ret <4 x double> %t } -; CHECK-LABEL: ceil_v4f64: -; CHECK: frip -; CHECK: frip -; CHECK: frip -; CHECK: frip declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p) define <2 x double> @trunc_v2f64(<2 x double> %p) +; CHECK-LABEL: trunc_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: friz 1, 1 +; CHECK-NEXT: friz 2, 2 +; CHECK-NEXT: blr { %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) ret <2 x double> %t } -; CHECK-LABEL: trunc_v2f64: -; CHECK: friz -; CHECK: friz declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p) define <4 x double> @trunc_v4f64(<4 x double> %p) +; CHECK-LABEL: trunc_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: friz 1, 1 +; CHECK-NEXT: friz 2, 2 +; CHECK-NEXT: friz 3, 3 +; CHECK-NEXT: friz 4, 4 +; CHECK-NEXT: blr { %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) ret <4 x double> %t } -; CHECK-LABEL: trunc_v4f64: -; CHECK: friz -; CHECK: friz -; CHECK: friz -; CHECK: friz declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) -define <2 x double> @nearbyint_v2f64(<2 x double> %p) +define <2 x double> @nearbyint_v2f64(<2 x double> %p) nounwind +; CHECK-LABEL: nearbyint_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stdu 1, -128(1) +; CHECK-NEXT: std 0, 144(1) +; CHECK-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: fmr 31, 2 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 30, 1 +; CHECK-NEXT: fmr 1, 31 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 2, 1 +; CHECK-NEXT: fmr 1, 30 +; CHECK-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 128 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr { %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) ret <2 x double> %t } -; CHECK-LABEL: nearbyint_v2f64: -; CHECK: bl nearbyint -; CHECK: bl nearbyint declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) -define <4 x double> @nearbyint_v4f64(<4 x double> %p) +define <4 x double> @nearbyint_v4f64(<4 x double> %p) nounwind +; CHECK-LABEL: nearbyint_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stdu 1, -144(1) +; CHECK-NEXT: std 0, 160(1) +; CHECK-NEXT: stfd 28, 112(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 29, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: fmr 29, 2 +; CHECK-NEXT: stfd 30, 128(1) # 8-byte Folded Spill +; CHECK-NEXT: fmr 30, 3 +; CHECK-NEXT: stfd 31, 136(1) # 8-byte Folded Spill +; CHECK-NEXT: fmr 31, 4 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 28, 1 +; CHECK-NEXT: fmr 1, 29 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 29, 1 +; CHECK-NEXT: fmr 1, 30 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 30, 1 +; CHECK-NEXT: fmr 1, 31 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 4, 1 +; CHECK-NEXT: fmr 1, 28 +; CHECK-NEXT: lfd 31, 136(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 28, 112(1) # 8-byte Folded Reload +; CHECK-NEXT: fmr 2, 29 +; CHECK-NEXT: fmr 3, 30 +; CHECK-NEXT: lfd 30, 128(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 29, 120(1) # 8-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 144 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr { %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) ret <4 x double> %t } -; CHECK-LABEL: nearbyint_v4f64: -; CHECK: bl nearbyint -; CHECK: bl nearbyint -; CHECK: bl nearbyint -; CHECK: bl nearbyint declare <4 x float> @llvm.floor.v4f32(<4 x float> %p) define <4 x float> @floor_v4f32(<4 x float> %p) +; CHECK-LABEL: floor_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfim 2, 2 +; CHECK-NEXT: blr { %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) ret <4 x float> %t } -; CHECK-LABEL: floor_v4f32: -; CHECK: vrfim declare <8 x float> @llvm.floor.v8f32(<8 x float> %p) define <8 x float> @floor_v8f32(<8 x float> %p) +; CHECK-LABEL: floor_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfim 2, 2 +; CHECK-NEXT: vrfim 3, 3 +; CHECK-NEXT: blr { %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) ret <8 x float> %t } -; CHECK-LABEL: floor_v8f32: -; CHECK: vrfim -; CHECK: vrfim declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p) define <4 x float> @ceil_v4f32(<4 x float> %p) +; CHECK-LABEL: ceil_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfip 2, 2 +; CHECK-NEXT: blr { %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) ret <4 x float> %t } -; CHECK-LABEL: ceil_v4f32: -; CHECK: vrfip declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p) define <8 x float> @ceil_v8f32(<8 x float> %p) +; CHECK-LABEL: ceil_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfip 2, 2 +; CHECK-NEXT: vrfip 3, 3 +; CHECK-NEXT: blr { %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) ret <8 x float> %t } -; CHECK-LABEL: ceil_v8f32: -; CHECK: vrfip -; CHECK: vrfip declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p) define <4 x float> @trunc_v4f32(<4 x float> %p) +; CHECK-LABEL: trunc_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfiz 2, 2 +; CHECK-NEXT: blr { %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) ret <4 x float> %t } -; CHECK-LABEL: trunc_v4f32: -; CHECK: vrfiz declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p) define <8 x float> @trunc_v8f32(<8 x float> %p) +; CHECK-LABEL: trunc_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfiz 2, 2 +; CHECK-NEXT: vrfiz 3, 3 +; CHECK-NEXT: blr { %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) ret <8 x float> %t } -; CHECK-LABEL: trunc_v8f32: -; CHECK: vrfiz -; CHECK: vrfiz declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) define <4 x float> @nearbyint_v4f32(<4 x float> %p) +; CHECK-LABEL: nearbyint_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfin 2, 2 +; CHECK-NEXT: blr { %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) ret <4 x float> %t } -; CHECK-LABEL: nearbyint_v4f32: -; CHECK: vrfin declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) define <8 x float> @nearbyint_v8f32(<8 x float> %p) +; CHECK-LABEL: nearbyint_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfin 2, 2 +; CHECK-NEXT: vrfin 3, 3 +; CHECK-NEXT: blr { %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) ret <8 x float> %t } -; CHECK-LABEL: nearbyint_v8f32: -; CHECK: vrfin -; CHECK: vrfin From 9762ab0c3d25b311826f57f0c5b4d601dcede8bc Mon Sep 17 00:00:00 2001 From: Durgadoss R Date: Wed, 5 Nov 2025 20:57:26 +0530 Subject: [PATCH 02/61] [MLIR][NVVM] Fix the lowering of mbarrier.test.wait (#166555) PR #165993 accidentally broke the lowering of the `test.wait` Op. This patch fixes the issue and adds tests to verify the lowering to intrinsics for all mbarrier Ops, ensuring similar regressions are caught in the future. Additionally, the `cp-async-mbarrier` test is moved to the `mbarriers.mlir` test file to keep all related tests together. Signed-off-by: Durgadoss R --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 2 +- mlir/test/Target/LLVMIR/nvvm/mbarriers.mlir | 116 ++++++++++++++++++++ mlir/test/Target/LLVMIR/nvvmir.mlir | 13 --- 3 files changed, 117 insertions(+), 14 deletions(-) create mode 100644 mlir/test/Target/LLVMIR/nvvm/mbarriers.mlir diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 10f0cc254ea97..80bc0e5986e51 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -949,7 +949,7 @@ def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait">, }]; string llvmBuilder = [{ - auto [id, args] = NVVM::MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs( + auto [id, args] = NVVM::MBarrierTestWaitOp::getIntrinsicIDAndArgs( *op, moduleTranslation, builder); $res = createIntrinsicCall(builder, id, args); }]; diff --git a/mlir/test/Target/LLVMIR/nvvm/mbarriers.mlir b/mlir/test/Target/LLVMIR/nvvm/mbarriers.mlir new file mode 100644 index 0000000000000..9bb3b082777fd --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/mbarriers.mlir @@ -0,0 +1,116 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +llvm.func @cp_async_mbarrier_arrive(%bar_shared: !llvm.ptr<3>, %bar_gen: !llvm.ptr) { + // CHECK-LABEL: define void @cp_async_mbarrier_arrive(ptr addrspace(3) %0, ptr %1) { + // CHECK-NEXT: call void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %1) + // CHECK-NEXT: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %1) + // CHECK-NEXT: call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %0) + // CHECK-NEXT: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %0) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + nvvm.cp.async.mbarrier.arrive %bar_gen : !llvm.ptr + nvvm.cp.async.mbarrier.arrive %bar_gen {noinc = true} : !llvm.ptr + nvvm.cp.async.mbarrier.arrive %bar_shared : !llvm.ptr<3> + nvvm.cp.async.mbarrier.arrive %bar_shared {noinc = true} : !llvm.ptr<3> + llvm.return +} + +llvm.func @mbarrier_init_generic(%barrier: !llvm.ptr) { + // CHECK-LABEL: define void @mbarrier_init_generic(ptr %0) { + // CHECK-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-NEXT: call void @llvm.nvvm.mbarrier.init(ptr %0, i32 %2) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %count = nvvm.read.ptx.sreg.ntid.x : i32 + nvvm.mbarrier.init %barrier, %count : !llvm.ptr, i32 + llvm.return +} + +llvm.func @mbarrier_init_shared(%barrier: !llvm.ptr<3>) { + // CHECK-LABEL: define void @mbarrier_init_shared(ptr addrspace(3) %0) { + // CHECK-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-NEXT: call void @llvm.nvvm.mbarrier.init.shared(ptr addrspace(3) %0, i32 %2) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %count = nvvm.read.ptx.sreg.ntid.x : i32 + nvvm.mbarrier.init %barrier, %count : !llvm.ptr<3>, i32 + llvm.return +} + +llvm.func @mbarrier_inval_generic(%barrier: !llvm.ptr) { + // CHECK-LABEL: define void @mbarrier_inval_generic(ptr %0) { + // CHECK-NEXT: call void @llvm.nvvm.mbarrier.inval(ptr %0) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + nvvm.mbarrier.inval %barrier : !llvm.ptr + llvm.return +} + +llvm.func @mbarrier_inval_shared(%barrier: !llvm.ptr<3>) { + // CHECK-LABEL: define void @mbarrier_inval_shared(ptr addrspace(3) %0) { + // CHECK-NEXT: call void @llvm.nvvm.mbarrier.inval.shared(ptr addrspace(3) %0) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + nvvm.mbarrier.inval %barrier : !llvm.ptr<3> + llvm.return +} + +llvm.func @mbarrier_arrive(%barrier: !llvm.ptr) { + // CHECK-LABEL: define void @mbarrier_arrive(ptr %0) { + // CHECK-NEXT: %2 = call i64 @llvm.nvvm.mbarrier.arrive(ptr %0) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %0 = nvvm.mbarrier.arrive %barrier : !llvm.ptr -> i64 + llvm.return +} + +llvm.func @mbarrier_arrive_shared(%barrier: !llvm.ptr<3>) { + // CHECK-LABEL: define void @mbarrier_arrive_shared(ptr addrspace(3) %0) { + // CHECK-NEXT: %2 = call i64 @llvm.nvvm.mbarrier.arrive.shared(ptr addrspace(3) %0) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %0 = nvvm.mbarrier.arrive %barrier : !llvm.ptr<3> -> i64 + llvm.return +} + +llvm.func @mbarrier_arrive_nocomplete(%barrier: !llvm.ptr) { + // CHECK-LABEL: define void @mbarrier_arrive_nocomplete(ptr %0) { + // CHECK-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-NEXT: %3 = call i64 @llvm.nvvm.mbarrier.arrive.noComplete(ptr %0, i32 %2) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %count = nvvm.read.ptx.sreg.ntid.x : i32 + %0 = nvvm.mbarrier.arrive.nocomplete %barrier, %count : !llvm.ptr, i32 -> i64 + llvm.return +} + +llvm.func @mbarrier_arrive_nocomplete_shared(%barrier: !llvm.ptr<3>) { + // CHECK-LABEL: define void @mbarrier_arrive_nocomplete_shared(ptr addrspace(3) %0) { + // CHECK-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-NEXT: %3 = call i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(ptr addrspace(3) %0, i32 %2) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %count = nvvm.read.ptx.sreg.ntid.x : i32 + %0 = nvvm.mbarrier.arrive.nocomplete %barrier, %count : !llvm.ptr<3>, i32 -> i64 + llvm.return +} + +llvm.func @mbarrier_test_wait(%barrier: !llvm.ptr, %token : i64) -> i1 { + // CHECK-LABEL: define i1 @mbarrier_test_wait(ptr %0, i64 %1) { + // CHECK-NEXT: %3 = call i1 @llvm.nvvm.mbarrier.test.wait(ptr %0, i64 %1) + // CHECK-NEXT: ret i1 %3 + // CHECK-NEXT: } + %isComplete = nvvm.mbarrier.test.wait %barrier, %token : !llvm.ptr, i64 -> i1 + llvm.return %isComplete : i1 +} + +llvm.func @mbarrier_test_wait_shared(%barrier: !llvm.ptr<3>, %token : i64) { + // CHECK-LABEL: define void @mbarrier_test_wait_shared(ptr addrspace(3) %0, i64 %1) { + // CHECK-NEXT: %3 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-NEXT: %4 = call i1 @llvm.nvvm.mbarrier.test.wait.shared(ptr addrspace(3) %0, i64 %1) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %count = nvvm.read.ptx.sreg.ntid.x : i32 + %isComplete = nvvm.mbarrier.test.wait %barrier, %token : !llvm.ptr<3>, i64 -> i1 + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index 3fc09f371a347..1ec55408e97a5 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -531,19 +531,6 @@ llvm.func @async_cp_zfill(%dst: !llvm.ptr<3>, %src: !llvm.ptr<1>, %cpSize: i32) llvm.return } -// CHECK-LABEL: @cp_async_mbarrier_arrive -llvm.func @cp_async_mbarrier_arrive(%bar_shared: !llvm.ptr<3>, %bar_gen: !llvm.ptr) { - // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %{{.*}}) - nvvm.cp.async.mbarrier.arrive %bar_gen : !llvm.ptr - // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %{{.*}}) - nvvm.cp.async.mbarrier.arrive %bar_gen {noinc = true} : !llvm.ptr - // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %{{.*}}) - nvvm.cp.async.mbarrier.arrive %bar_shared : !llvm.ptr<3> - // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %{{.*}}) - nvvm.cp.async.mbarrier.arrive %bar_shared {noinc = true} : !llvm.ptr<3> - llvm.return -} - // CHECK-LABEL: @llvm_nvvm_setmaxregister llvm.func @llvm_nvvm_setmaxregister() { // CHECK: call void @llvm.nvvm.setmaxnreg.inc.sync.aligned.u32(i32 256) From 338fb02c9878761a97ed0e7e44c19d6ed8463434 Mon Sep 17 00:00:00 2001 From: Elvina Yakubova Date: Wed, 5 Nov 2025 15:28:31 +0000 Subject: [PATCH 03/61] =?UTF-8?q?[BOLT][NFC]=20Rename=20funtions=20with=20?= =?UTF-8?q?=5Fnegative=20suffix=20to=20=5Funknown=20when=20th=E2=80=A6=20(?= =?UTF-8?q?#166536)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …e size is unknown Keep _negative suffix only for test cases when the size is negative --- bolt/test/runtime/AArch64/inline-memcpy.s | 30 +++++++++++------------ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/bolt/test/runtime/AArch64/inline-memcpy.s b/bolt/test/runtime/AArch64/inline-memcpy.s index badff299603a0..75066c855b9ed 100644 --- a/bolt/test/runtime/AArch64/inline-memcpy.s +++ b/bolt/test/runtime/AArch64/inline-memcpy.s @@ -81,14 +81,14 @@ # CHECK-ASM: bl{{.*}}: +# CHECK-ASM-LABEL: : # CHECK-ASM: bl{{.*}}: +# CHECK-ASM-LABEL: : # CHECK-ASM: bl{{.*}}: +# CHECK-ASM-LABEL: : # CHECK-ASM: bl{{.*}} Date: Wed, 5 Nov 2025 16:35:25 +0100 Subject: [PATCH 04/61] [flang] Adding NOTIFY specifier in image selector and add notify type checks (#148810) This PR adds support for the NOTIFY specifier in the image selector as described in the 2023 standard, and add checks for the NOTIFY_TYPE type. --- flang/examples/FeatureList/FeatureList.cpp | 1 + flang/include/flang/Evaluate/traverse.h | 2 +- flang/include/flang/Evaluate/variable.h | 4 +++ flang/include/flang/Parser/dump-parse-tree.h | 1 + flang/include/flang/Parser/parse-tree.h | 4 ++- flang/include/flang/Semantics/tools.h | 3 ++ flang/lib/Evaluate/variable.cpp | 13 +++++++ flang/lib/Lower/Support/Utils.cpp | 5 +-- flang/lib/Parser/Fortran-parsers.cpp | 7 ++-- flang/lib/Parser/unparse.cpp | 1 + flang/lib/Semantics/check-declarations.cpp | 13 +++++++ flang/lib/Semantics/dump-expr.cpp | 1 + flang/lib/Semantics/expression.cpp | 13 +++++++ flang/lib/Semantics/tools.cpp | 38 ++++++++++++++++++++ flang/test/Semantics/coarrays02.f90 | 17 +++++++++ flang/test/Semantics/notifywait03.f90 | 1 + 16 files changed, 118 insertions(+), 6 deletions(-) diff --git a/flang/examples/FeatureList/FeatureList.cpp b/flang/examples/FeatureList/FeatureList.cpp index ef58da61e371b..bb55a8163d938 100644 --- a/flang/examples/FeatureList/FeatureList.cpp +++ b/flang/examples/FeatureList/FeatureList.cpp @@ -348,6 +348,7 @@ struct NodeVisitor { READ_FEATURE(TeamValue) READ_FEATURE(ImageSelector) READ_FEATURE(ImageSelectorSpec) + READ_FEATURE(ImageSelectorSpec::Notify) READ_FEATURE(ImageSelectorSpec::Stat) READ_FEATURE(ImageSelectorSpec::Team_Number) READ_FEATURE(ImplicitPart) diff --git a/flang/include/flang/Evaluate/traverse.h b/flang/include/flang/Evaluate/traverse.h index 48aafa8982559..d63c16f93230a 100644 --- a/flang/include/flang/Evaluate/traverse.h +++ b/flang/include/flang/Evaluate/traverse.h @@ -146,7 +146,7 @@ class Traverse { return Combine(x.base(), x.subscript()); } Result operator()(const CoarrayRef &x) const { - return Combine(x.base(), x.cosubscript(), x.stat(), x.team()); + return Combine(x.base(), x.cosubscript(), x.notify(), x.stat(), x.team()); } Result operator()(const DataRef &x) const { return visitor_(x.u); } Result operator()(const Substring &x) const { diff --git a/flang/include/flang/Evaluate/variable.h b/flang/include/flang/Evaluate/variable.h index 5c14421fd3a1b..4f64ede3d407d 100644 --- a/flang/include/flang/Evaluate/variable.h +++ b/flang/include/flang/Evaluate/variable.h @@ -260,6 +260,9 @@ class CoarrayRef { // it's TEAM=. std::optional> team() const; CoarrayRef &set_team(Expr &&); + // When notify() is Expr, it's NOTIFY=. + std::optional> notify() const; + CoarrayRef &set_notify(Expr &&); int Rank() const; int Corank() const { return 0; } @@ -272,6 +275,7 @@ class CoarrayRef { private: common::CopyableIndirection base_; std::vector> cosubscript_; + std::optional>> notify_; std::optional>> stat_; std::optional>> team_; }; diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index de2716410d6cd..b2424023b0168 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -387,6 +387,7 @@ class ParseTreeDumper { NODE(parser, TeamValue) NODE(parser, ImageSelector) NODE(parser, ImageSelectorSpec) + NODE(ImageSelectorSpec, Notify) NODE(ImageSelectorSpec, Stat) NODE(ImageSelectorSpec, Team_Number) NODE(parser, ImplicitPart) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 8c7578f7a1941..32e444fbb2e6c 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -1684,13 +1684,15 @@ using Cosubscript = ScalarIntExpr; WRAPPER_CLASS(TeamValue, Scalar>); // R926 image-selector-spec -> +// NOTIFY = notify-variable | // STAT = stat-variable | TEAM = team-value | // TEAM_NUMBER = scalar-int-expr struct ImageSelectorSpec { WRAPPER_CLASS(Stat, Scalar>>); WRAPPER_CLASS(Team_Number, ScalarIntExpr); + WRAPPER_CLASS(Notify, Scalar>); UNION_CLASS_BOILERPLATE(ImageSelectorSpec); - std::variant u; + std::variant u; }; // R924 image-selector -> diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index 8a7b9867c0979..1c3477013b559 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -107,6 +107,7 @@ bool IsBindCProcedure(const Scope &); // Returns a pointer to the function's symbol when true, else null const Symbol *IsFunctionResultWithSameNameAsFunction(const Symbol &); bool IsOrContainsEventOrLockComponent(const Symbol &); +bool IsOrContainsNotifyComponent(const Symbol &); bool CanBeTypeBoundProc(const Symbol &); // Does a non-PARAMETER symbol have explicit initialization with =value or // =>target in its declaration (but not in a DATA statement)? (Being @@ -652,6 +653,8 @@ using PotentialAndPointerComponentIterator = // dereferenced. PotentialComponentIterator::const_iterator FindEventOrLockPotentialComponent( const DerivedTypeSpec &, bool ignoreCoarrays = false); +PotentialComponentIterator::const_iterator FindNotifyPotentialComponent( + const DerivedTypeSpec &, bool ignoreCoarrays = false); PotentialComponentIterator::const_iterator FindCoarrayPotentialComponent( const DerivedTypeSpec &); PotentialAndPointerComponentIterator::const_iterator diff --git a/flang/lib/Evaluate/variable.cpp b/flang/lib/Evaluate/variable.cpp index b9b34d4d5bc89..b257dad42fc58 100644 --- a/flang/lib/Evaluate/variable.cpp +++ b/flang/lib/Evaluate/variable.cpp @@ -89,6 +89,14 @@ std::optional> CoarrayRef::team() const { } } +std::optional> CoarrayRef::notify() const { + if (notify_) { + return notify_.value().value(); + } else { + return std::nullopt; + } +} + CoarrayRef &CoarrayRef::set_stat(Expr &&v) { CHECK(IsVariable(v)); stat_.emplace(std::move(v)); @@ -100,6 +108,11 @@ CoarrayRef &CoarrayRef::set_team(Expr &&v) { return *this; } +CoarrayRef &CoarrayRef::set_notify(Expr &&v) { + notify_.emplace(std::move(v)); + return *this; +} + const Symbol &CoarrayRef::GetFirstSymbol() const { return base().GetFirstSymbol(); } diff --git a/flang/lib/Lower/Support/Utils.cpp b/flang/lib/Lower/Support/Utils.cpp index 1b4d37e9798a9..4b95a3adf052a 100644 --- a/flang/lib/Lower/Support/Utils.cpp +++ b/flang/lib/Lower/Support/Utils.cpp @@ -82,7 +82,7 @@ class HashEvaluateExpr { x.cosubscript()) cosubs -= getHashValue(v); return getHashValue(x.base()) * 97u - cosubs + getHashValue(x.stat()) + - 257u + getHashValue(x.team()); + 257u + getHashValue(x.team()) + getHashValue(x.notify()); } static unsigned getHashValue(const Fortran::evaluate::NamedEntity &x) { if (x.IsSymbol()) @@ -341,7 +341,8 @@ class IsEqualEvaluateExpr { const Fortran::evaluate::CoarrayRef &y) { return isEqual(x.base(), y.base()) && isEqual(x.cosubscript(), y.cosubscript()) && - isEqual(x.stat(), y.stat()) && isEqual(x.team(), y.team()); + isEqual(x.stat(), y.stat()) && isEqual(x.team(), y.team()) && + isEqual(x.notify(), y.notify()); } static bool isEqual(const Fortran::evaluate::NamedEntity &x, const Fortran::evaluate::NamedEntity &y) { diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp index 59fe7d813d96a..ea6a1eada2741 100644 --- a/flang/lib/Parser/Fortran-parsers.cpp +++ b/flang/lib/Parser/Fortran-parsers.cpp @@ -1212,12 +1212,15 @@ TYPE_CONTEXT_PARSER("image selector"_en_US, // R926 image-selector-spec -> // STAT = stat-variable | TEAM = team-value | -// TEAM_NUMBER = scalar-int-expr +// TEAM_NUMBER = scalar-int-expr | +// NOTIFY = notify-variable TYPE_PARSER(construct(construct( "STAT =" >> scalar(integer(indirect(variable))))) || construct(construct("TEAM =" >> teamValue)) || construct(construct( - "TEAM_NUMBER =" >> scalarIntExpr))) + "TEAM_NUMBER =" >> scalarIntExpr)) || + construct(construct( + "NOTIFY =" >> scalar(indirect(variable))))) // R927 allocate-stmt -> // ALLOCATE ( [type-spec ::] allocation-list [, alloc-opt-list] ) diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 84123030195e9..6bb14a43e7b99 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -819,6 +819,7 @@ class UnparseVisitor { Word("TEAM="); } } + void Before(const ImageSelectorSpec::Notify &) { Word("NOTIFY="); } void Unparse(const AllocateStmt &x) { // R927 Word("ALLOCATE("); Walk(std::get>(x.t), "::"); diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index de407d3b1e125..9a6b3ff3cdc2c 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -855,6 +855,15 @@ void CheckHelper::CheckObjectEntity( messages_.Say( "Variable '%s' with EVENT_TYPE or LOCK_TYPE potential component '%s' must be a coarray"_err_en_US, symbol.name(), component.BuildResultDesignatorName()); + } else if (IsNotifyType(derived)) { // C1612 + messages_.Say( + "Variable '%s' with NOTIFY_TYPE must be a coarray"_err_en_US, + symbol.name()); + } else if (auto component{FindNotifyPotentialComponent( // C1611 + *derived, /*ignoreCoarrays=*/true)}) { + messages_.Say( + "Variable '%s' with NOTIFY_TYPE potential component '%s' must be a coarray"_err_en_US, + symbol.name(), component.BuildResultDesignatorName()); } } } @@ -873,6 +882,10 @@ void CheckHelper::CheckObjectEntity( messages_.Say( "An INTENT(OUT) dummy argument may not be, or contain, EVENT_TYPE or LOCK_TYPE"_err_en_US); } + if (IsOrContainsNotifyComponent(symbol)) { // C1613 + messages_.Say( + "An INTENT(OUT) dummy argument may not be, or contain, NOTIFY_TYPE"_err_en_US); + } if (IsAssumedSizeArray(symbol)) { // C834 if (type && type->IsPolymorphic()) { messages_.Say( diff --git a/flang/lib/Semantics/dump-expr.cpp b/flang/lib/Semantics/dump-expr.cpp index 66cedab94bfb4..8d354cf65b61e 100644 --- a/flang/lib/Semantics/dump-expr.cpp +++ b/flang/lib/Semantics/dump-expr.cpp @@ -23,6 +23,7 @@ void DumpEvaluateExpr::Show(const evaluate::CoarrayRef &x) { Indent("coarray ref"); Show(x.base()); Show(x.cosubscript()); + Show(x.notify()); Show(x.stat()); Show(x.team()); Outdent(); diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index c8167fd34f666..ac58dfc005f17 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -1579,6 +1579,19 @@ MaybeExpr ExpressionAnalyzer::Analyze(const parser::CoindexedNamedObject &x) { std::get>(x.imageSelector.t)) { common::visit( common::visitors{ + [&](const parser::ImageSelectorSpec::Notify &x) { + Analyze(x.v); + if (const auto *expr{GetExpr(context_, x.v)}) { + if (coarrayRef.notify()) { + Say("coindexed reference has multiple NOTIFY= specifiers"_err_en_US); + } else if (auto dyType{expr->GetType()}; + dyType && IsNotifyType(GetDerivedTypeSpec(*dyType))) { + coarrayRef.set_notify(Expr{*expr}); + } else { + Say("NOTIFY= specifier must have type NOTIFY_TYPE from ISO_FORTRAN_ENV"_err_en_US); + } + } + }, [&](const parser::ImageSelectorSpec::Stat &x) { Analyze(x.v); if (const auto *expr{GetExpr(context_, x.v)}) { diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 8eddd03faa962..cf1e5e7d44565 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -582,6 +582,18 @@ bool IsOrContainsEventOrLockComponent(const Symbol &original) { return false; } +bool IsOrContainsNotifyComponent(const Symbol &original) { + const Symbol &symbol{ResolveAssociations(original, /*stopAtTypeGuard=*/true)}; + if (evaluate::IsVariable(symbol)) { + if (const DeclTypeSpec *type{symbol.GetType()}) { + if (const DerivedTypeSpec *derived{type->AsDerived()}) { + return IsNotifyType(derived) || FindNotifyPotentialComponent(*derived); + } + } + } + return false; +} + // Check this symbol suitable as a type-bound procedure - C769 bool CanBeTypeBoundProc(const Symbol &symbol) { if (IsDummy(symbol) || IsProcedurePointer(symbol)) { @@ -1489,6 +1501,32 @@ PotentialComponentIterator::const_iterator FindEventOrLockPotentialComponent( return iter; } +PotentialComponentIterator::const_iterator FindNotifyPotentialComponent( + const DerivedTypeSpec &derived, bool ignoreCoarrays) { + PotentialComponentIterator potentials{derived}; + auto iter{potentials.begin()}; + for (auto end{potentials.end()}; iter != end; ++iter) { + const Symbol &component{*iter}; + if (const auto *object{component.detailsIf()}) { + if (const DeclTypeSpec *type{object->type()}) { + if (IsNotifyType(type->AsDerived())) { + if (!ignoreCoarrays) { + break; // found one + } + auto path{iter.GetComponentPath()}; + path.pop_back(); + if (std::find_if(path.begin(), path.end(), [](const Symbol &sym) { + return evaluate::IsCoarray(sym); + }) == path.end()) { + break; // found one not in a coarray + } + } + } + } + } + return iter; +} + UltimateComponentIterator::const_iterator FindAllocatableUltimateComponent( const DerivedTypeSpec &derived) { UltimateComponentIterator ultimates{derived}; diff --git a/flang/test/Semantics/coarrays02.f90 b/flang/test/Semantics/coarrays02.f90 index b16e0ccb58797..e866dd89c07ab 100644 --- a/flang/test/Semantics/coarrays02.f90 +++ b/flang/test/Semantics/coarrays02.f90 @@ -16,6 +16,8 @@ program main type(event_type) event !ERROR: Variable 'lock' with EVENT_TYPE or LOCK_TYPE must be a coarray type(lock_type) lock + !ERROR: Variable 'notify' with NOTIFY_TYPE must be a coarray + type(notify_type) notify integer :: local[*] ! ok in main end @@ -120,3 +122,18 @@ subroutine s4 !ERROR: Subscripts must appear in a coindexed reference when its base is an array print *, ta(1)%a[1] end + +subroutine s5(a, notify, res) + use iso_fortran_env + type t + type(notify_type) :: a + end type + real, intent(in) :: a[*] + type(event_type), intent(in) :: notify[*] + !ERROR: An INTENT(OUT) dummy argument may not be, or contain, NOTIFY_TYPE + type(notify_type), intent(out) :: res[*] + !ERROR: Variable 'bad' with NOTIFY_TYPE potential component '%a' must be a coarray + type(t) :: bad + !ERROR: NOTIFY= specifier must have type NOTIFY_TYPE from ISO_FORTRAN_ENV + print *, a[1, NOTIFY=notify] +end diff --git a/flang/test/Semantics/notifywait03.f90 b/flang/test/Semantics/notifywait03.f90 index 0fc56f66ad32d..a336a7a67669a 100644 --- a/flang/test/Semantics/notifywait03.f90 +++ b/flang/test/Semantics/notifywait03.f90 @@ -10,6 +10,7 @@ program test_notify_wait implicit none ! notify_type variables must be coarrays + !ERROR: Variable 'non_coarray' with NOTIFY_TYPE must be a coarray type(notify_type) :: non_coarray type(notify_type) :: notify_var[*], notify_array(2)[*] From 52cb6e9d49f836b624bd0536734afd7aa4194ca0 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 5 Nov 2025 07:40:37 -0800 Subject: [PATCH 05/61] [ProfCheck][NFC] Make Function argument from branch weight setter optional (#166032) This picks up from #166028, making the `Function` argument optional: most cases don't need to provide it, but in e.g. InstCombine's case, where the instruction (select, branch) is not attached to a function yet, the function needs to be passed explicitly. Co-authored-by: Florian Hahn --- llvm/include/llvm/IR/ProfDataUtils.h | 9 +++++---- llvm/lib/IR/IRBuilder.cpp | 3 +-- llvm/lib/IR/ProfDataUtils.cpp | 9 ++++++--- .../AggressiveInstCombine/AggressiveInstCombine.cpp | 3 +-- llvm/lib/Transforms/InstCombine/InstCombineInternal.h | 2 +- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 3 +-- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 3 +-- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index a0876b169e0b8..a7bcbf010d1bf 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -194,10 +194,11 @@ LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I, /// Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch /// weights in the new instruction if the parent function of the original /// instruction has an entry count. This is to not confuse users by injecting -/// profile data into non-profiled functions. -LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, - Function &F, - StringRef PassName); +/// profile data into non-profiled functions. If \p F is nullptr, we will fetch +/// the function from \p I. +LLVM_ABI void +setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, + const Function *F = nullptr); /// Analogous to setExplicitlyUnknownBranchWeights, but for functions and their /// entry counts. diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 88dbd176e0d3f..95edb2e8e56d8 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -1019,8 +1019,7 @@ Value *IRBuilderBase::CreateSelectWithUnknownProfile(Value *C, Value *True, const Twine &Name) { Value *Ret = CreateSelectFMF(C, True, False, {}, Name); if (auto *SI = dyn_cast(Ret)) { - setExplicitlyUnknownBranchWeightsIfProfiled( - *SI, *SI->getParent()->getParent(), PassName); + setExplicitlyUnknownBranchWeightsIfProfiled(*SI, PassName); } return Ret; } diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index fc2be5188f456..94dbe1f3988b8 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -274,9 +274,12 @@ void llvm::setExplicitlyUnknownBranchWeights(Instruction &I, } void llvm::setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, - Function &F, - StringRef PassName) { - if (std::optional EC = F.getEntryCount(); + StringRef PassName, + const Function *F) { + F = F ? F : I.getFunction(); + assert(F && "Either pass a instruction attached to a Function, or explicitly " + "pass the Function that it will be attached to"); + if (std::optional EC = F->getEntryCount(); EC && EC->getCount() > 0) setExplicitlyUnknownBranchWeights(I, PassName); } diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 7a95df4b2a47c..b575d76e897d2 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -1378,8 +1378,7 @@ static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU, IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N); // We can't know the precise weights here, as they would depend on the value // distribution of Call->getArgOperand(1). So we just mark it as "unknown". - setExplicitlyUnknownBranchWeightsIfProfiled(*SI, *Call->getFunction(), - DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*SI, DEBUG_TYPE); Type *IndexTy = DL.getIndexType(Call->getType()); SmallVector Updates; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index d85e4f7590197..9bdd8cb71f7f3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -479,7 +479,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final const Twine &NameStr = "", InsertPosition InsertBefore = nullptr) { auto *Sel = SelectInst::Create(C, S1, S2, NameStr, InsertBefore, nullptr); - setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, F, DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, DEBUG_TYPE, &F); return Sel; } diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 0577ddbd2353c..7930b38174e49 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -330,8 +330,7 @@ static void buildPartialUnswitchConditionalBranch( HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof) : nullptr); if (!HasBranchWeights) - setExplicitlyUnknownBranchWeightsIfProfiled( - *BR, *BR->getParent()->getParent(), DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE); } /// Copy a set of loop invariant values, and conditionally branch on them. diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 3a3e3ade20212..9a8dbebe5bfba 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5214,8 +5214,7 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI, // We don't have any info about this condition. auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB) : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); - setExplicitlyUnknownBranchWeightsIfProfiled(*Br, *NewBB->getParent(), - DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*Br, DEBUG_TYPE); OldTI->eraseFromParent(); From 4334b43c6593f3839d33806131fd36c620390cbe Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 5 Nov 2025 10:43:56 -0500 Subject: [PATCH 06/61] [gn] port bb4ed55acdbc --- .../source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn | 10 ++++++++-- llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn | 1 - 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn index 9848efef70568..fa99fa8649caf 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn @@ -1,10 +1,16 @@ static_library("CPlusPlus") { output_name = "lldbPluginCPPRuntime" - configs += [ "//llvm/utils/gn/build:lldb_code" ] + configs += [ + "//llvm/utils/gn/build:clang_code", + "//llvm/utils/gn/build:lldb_code", + ] deps = [ "//lldb/source/Core", "//lldb/source/Symbol", "//lldb/source/Target", ] - sources = [ "CPPLanguageRuntime.cpp" ] + sources = [ + "CPPLanguageRuntime.cpp", + "VerboseTrapFrameRecognizer.cpp", + ] } diff --git a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn index a863baf912051..783eb96283596 100644 --- a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn @@ -105,6 +105,5 @@ static_library("Target") { "UnixSignals.cpp", "UnwindAssembly.cpp", "UnwindLLDB.cpp", - "VerboseTrapFrameRecognizer.cpp", ] } From fa6cc7eadedd61347456f088ef40f6dfdcd751e2 Mon Sep 17 00:00:00 2001 From: Doug Wyatt Date: Wed, 5 Nov 2025 07:45:33 -0800 Subject: [PATCH 07/61] [Clang] FunctionEffects: ignore (methods of) local CXXRecordDecls. (#166078) In the following example, `Functor::method()` inappropriately triggers a diagnostic that `outer()` is blocking by allocating memory. ``` void outer() [[clang::nonblocking]] { struct Functor { int* ptr; void method() { ptr = new int; } }; } ``` --------- Co-authored-by: Doug Wyatt --- clang/lib/Sema/SemaFunctionEffects.cpp | 8 ++++++++ .../Sema/attr-nonblocking-constraints.cpp | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/clang/lib/Sema/SemaFunctionEffects.cpp b/clang/lib/Sema/SemaFunctionEffects.cpp index 4b63eb7df1054..12cc02965e7d3 100644 --- a/clang/lib/Sema/SemaFunctionEffects.cpp +++ b/clang/lib/Sema/SemaFunctionEffects.cpp @@ -1302,6 +1302,14 @@ class Analyzer { return true; } + bool TraverseCXXRecordDecl(CXXRecordDecl *D) override { + // Completely skip local struct/class/union declarations since their + // methods would otherwise be incorrectly interpreted as part of the + // function we are currently traversing. The initial Sema pass will have + // already recorded any nonblocking methods needing analysis. + return true; + } + bool TraverseConstructorInitializer(CXXCtorInitializer *Init) override { ViolationSite PrevVS = VSite; if (Init->isAnyMemberInitializer()) diff --git a/clang/test/Sema/attr-nonblocking-constraints.cpp b/clang/test/Sema/attr-nonblocking-constraints.cpp index 881e816292d59..012c017798a1f 100644 --- a/clang/test/Sema/attr-nonblocking-constraints.cpp +++ b/clang/test/Sema/attr-nonblocking-constraints.cpp @@ -104,6 +104,25 @@ void nb8c() }; } +void nb8d() [[clang::nonblocking]] +{ + // Blocking methods of a local CXXRecordDecl do not generate diagnostics + // for the outer function. + struct F1 { + void method() { void* ptr = new int; } + }; + + // Skipping the CXXRecordDecl does not skip a following VarDecl. + struct F2 { + F2() { void* ptr = new int; } // expected-note {{constructor cannot be inferred 'nonblocking' because it allocates or deallocates memory}} + } f2; // expected-warning {{function with 'nonblocking' attribute must not call non-'nonblocking' constructor 'nb8d()::F2::F2'}} + + // Nonblocking methods of a local CXXRecordDecl are verified independently. + struct F3 { + void method() [[clang::nonblocking]] { void* ptr = new int; }// expected-warning {{function with 'nonblocking' attribute must not allocate or deallocate memory}} + }; +} + // Make sure template expansions are found and verified. template struct Adder { From d568601d5a0c7c315d8038f4dc24e0ccd97a1ba0 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 5 Nov 2025 07:46:12 -0800 Subject: [PATCH 08/61] [NFC][TableGen] Adopt NamespaceEmitter in DirectiveEmitter (#165600) --- llvm/test/TableGen/directive1.td | 7 ++-- llvm/test/TableGen/directive2.td | 7 ++-- .../utils/TableGen/Basic/DirectiveEmitter.cpp | 36 ++++++------------- 3 files changed, 16 insertions(+), 34 deletions(-) diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td index 8648651f3d714..5bd7890e0ddd1 100644 --- a/llvm/test/TableGen/directive1.td +++ b/llvm/test/TableGen/directive1.td @@ -186,8 +186,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // IMPL: #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-NEXT: #undef GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-EMPTY: -// IMPL-NEXT: namespace llvm { -// IMPL-NEXT: namespace tdl { +// IMPL-NEXT: namespace llvm::tdl { // IMPL-EMPTY: // IMPL-NEXT: // Sets for dira // IMPL-EMPTY: @@ -204,8 +203,8 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // IMPL-EMPTY: // IMPL-NEXT: static requiredClauses_TDLD_dira { // IMPL-NEXT: }; -// IMPL-NEXT: } // namespace tdl -// IMPL-NEXT: } // namespace llvm +// IMPL-EMPTY: +// IMPL-NEXT: } // namespace llvm::tdl // IMPL-EMPTY: // IMPL-NEXT: #endif // GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-EMPTY: diff --git a/llvm/test/TableGen/directive2.td b/llvm/test/TableGen/directive2.td index 96022d7647440..eaaf82ddaaf41 100644 --- a/llvm/test/TableGen/directive2.td +++ b/llvm/test/TableGen/directive2.td @@ -159,8 +159,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // IMPL: #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-NEXT: #undef GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-EMPTY: -// IMPL-NEXT: namespace llvm { -// IMPL-NEXT: namespace tdl { +// IMPL-NEXT: namespace llvm::tdl { // IMPL-EMPTY: // IMPL-NEXT: // Sets for dira // IMPL-EMPTY: @@ -177,8 +176,8 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // IMPL-EMPTY: // IMPL-NEXT: static requiredClauses_TDLD_dira { // IMPL-NEXT: }; -// IMPL-NEXT: } // namespace tdl -// IMPL-NEXT: } // namespace llvm +// IMPL-EMPTY: +// IMPL-NEXT: } // namespace llvm::tdl // IMPL-EMPTY: // IMPL-NEXT: #endif // GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-EMPTY: diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp index d33bf45595e2e..0bb743dc8a7f5 100644 --- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp +++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp @@ -359,7 +359,6 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) { OS << " static constexpr bool is_iterable = true;\n"; OS << "};\n"; } - LlvmNS.close(); } // Given a list of spellings (for a given clause/directive), order them @@ -931,27 +930,20 @@ static void generateClauseSet(ArrayRef VerClauses, // Generate an enum set for the 4 kinds of clauses linked to a directive. static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang, Frontend FE, raw_ostream &OS) { + IfDefEmitter Scope(OS, "GEN_" + getFESpelling(FE).upper() + + "_DIRECTIVE_CLAUSE_SETS"); - std::string IfDefName{"GEN_"}; - IfDefName += getFESpelling(FE).upper(); - IfDefName += "_DIRECTIVE_CLAUSE_SETS"; - IfDefEmitter Scope(OS, IfDefName); - - StringRef Namespace = - getFESpelling(FE == Frontend::Flang ? Frontend::LLVM : FE); + std::string Namespace = + getFESpelling(FE == Frontend::Flang ? Frontend::LLVM : FE).str(); // The namespace has to be different for clang vs flang, as 2 structs with the // same name but different layout is UB. So just put the 'clang' on in the // clang namespace. - OS << "namespace " << Namespace << " {\n"; - - // Open namespaces defined in the directive language. - SmallVector Namespaces; - SplitString(DirLang.getCppNamespace(), Namespaces, "::"); - for (auto Ns : Namespaces) - OS << "namespace " << Ns << " {\n"; + // Additionally, open namespaces defined in the directive language. + if (!DirLang.getCppNamespace().empty()) + Namespace += "::" + DirLang.getCppNamespace().str(); + NamespaceEmitter NS(OS, Namespace); for (const Directive Dir : DirLang.getDirectives()) { - OS << "\n"; OS << "// Sets for " << Dir.getSpellingForIdentifier() << "\n"; generateClauseSet(Dir.getAllowedClauses(), OS, "allowedClauses_", Dir, @@ -963,12 +955,6 @@ static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang, generateClauseSet(Dir.getRequiredClauses(), OS, "requiredClauses_", Dir, DirLang, FE); } - - // Closing namespaces - for (auto Ns : reverse(Namespaces)) - OS << "} // namespace " << Ns << "\n"; - - OS << "} // namespace " << Namespace << "\n"; } // Generate a map of directive (key) with DirectiveClauses struct as values. @@ -976,10 +962,8 @@ static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang, // allowances (allowed, allowed once, allowed exclusive and required). static void generateDirectiveClauseMap(const DirectiveLanguage &DirLang, Frontend FE, raw_ostream &OS) { - std::string IfDefName{"GEN_"}; - IfDefName += getFESpelling(FE).upper(); - IfDefName += "_DIRECTIVE_CLAUSE_MAP"; - IfDefEmitter Scope(OS, IfDefName); + IfDefEmitter Scope(OS, "GEN_" + getFESpelling(FE).upper() + + "_DIRECTIVE_CLAUSE_MAP"); OS << "{\n"; From 3641e269b0fdf3614b6a2a068678d8431204a489 Mon Sep 17 00:00:00 2001 From: nerix Date: Wed, 5 Nov 2025 16:48:24 +0100 Subject: [PATCH 09/61] [MsDemangle] Read entire chain of target names in special tables (#155630) When there's a deep inheritance hierarchy of multiple C++ classes (see below), then the mangled name of a VFTable can include multiple key nodes in the target name. For example, in the following code, MSVC will generate mangled names for the VFTables that have up to three key classes in the context.
Code ```cpp class Base1 { virtual void a() {}; }; class Base2 { virtual void b() {} }; class Ind1 : public Base1 {}; class Ind2 : public Base1 {}; class A : public Ind1, public Ind2 {}; class Ind3 : public A {}; class Ind4 : public A {}; class B : public Ind3, public Ind4 {}; class Ind5 : public B {}; class Ind6 : public B {}; class C : public Ind5, public Ind6 {}; int main() { auto i = new C; } ```
This will include `??_7C@@6BInd1@@Ind4@@Ind5@@@` (and every other combination). Microsoft's undname will demangle this to "const C::\`vftable'{for \`Ind1's \`Ind4's \`Ind5'}". Previously, LLVM would demangle this to "const C::\`vftable'{for \`Ind1'}". With this PR, the output of LLVM's undname will be identical to Microsoft's version. This changes `SpecialTableSymbolNode::TargetName` to a node array which contains each key from the name. Unlike namespaces, these keys are not in reverse order - they are in the same order as in the mangled name. --- .../llvm/Demangle/MicrosoftDemangleNodes.h | 2 +- llvm/lib/Demangle/MicrosoftDemangle.cpp | 26 +++++++++++++++++-- llvm/lib/Demangle/MicrosoftDemangleNodes.cpp | 4 +-- llvm/test/Demangle/ms-operators.test | 15 +++++++++++ 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h index 155cfe8dd3a98..711aa70a4a8d3 100644 --- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h +++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h @@ -708,7 +708,7 @@ struct DEMANGLE_ABI SpecialTableSymbolNode : public SymbolNode { return N->kind() == NodeKind::SpecialTableSymbol; } - QualifiedNameNode *TargetName = nullptr; + NodeArrayNode *TargetNames = nullptr; Qualifiers Quals = Qualifiers::Q_None; }; diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp index b22928be3be50..250d382998982 100644 --- a/llvm/lib/Demangle/MicrosoftDemangle.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp @@ -15,6 +15,8 @@ #include "llvm/Demangle/MicrosoftDemangle.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Demangle/Demangle.h" #include "llvm/Demangle/DemangleConfig.h" #include "llvm/Demangle/MicrosoftDemangleNodes.h" @@ -277,6 +279,15 @@ demanglePointerCVQualifiers(std::string_view &MangledName) { DEMANGLE_UNREACHABLE; } +static NodeArrayNode *smallVecToNodeArray(ArenaAllocator &Arena, + ArrayRef Vec) { + NodeArrayNode *Arr = Arena.alloc(); + Arr->Count = Vec.size(); + Arr->Nodes = Arena.allocArray(Vec.size()); + std::memcpy(Arr->Nodes, Vec.data(), Vec.size() * sizeof(Node *)); + return Arr; +} + std::string_view Demangler::copyString(std::string_view Borrowed) { char *Stable = Arena.allocUnalignedBuffer(Borrowed.size()); // This is not a micro-optimization, it avoids UB, should Borrowed be an null @@ -323,8 +334,19 @@ Demangler::demangleSpecialTableSymbolNode(std::string_view &MangledName, } std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName); - if (!consumeFront(MangledName, '@')) - STSN->TargetName = demangleFullyQualifiedTypeName(MangledName); + + SmallVector TargetNames; + while (!consumeFront(MangledName, '@')) { + QualifiedNameNode *QN = demangleFullyQualifiedTypeName(MangledName); + if (Error) + return nullptr; + assert(QN); + TargetNames.push_back(QN); + } + + if (!TargetNames.empty()) + STSN->TargetNames = smallVecToNodeArray(Arena, TargetNames); + return STSN; } diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp index 61e4961c714bc..17c6aab500049 100644 --- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp @@ -662,9 +662,9 @@ void VcallThunkIdentifierNode::output(OutputBuffer &OB, void SpecialTableSymbolNode::output(OutputBuffer &OB, OutputFlags Flags) const { outputQualifiers(OB, Quals, false, true); Name->output(OB, Flags); - if (TargetName) { + if (TargetNames) { OB << "{for `"; - TargetName->output(OB, Flags); + TargetNames->output(OB, Flags, "'s `"); OB << "'}"; } } diff --git a/llvm/test/Demangle/ms-operators.test b/llvm/test/Demangle/ms-operators.test index b940488786631..cafa1ae3c0663 100644 --- a/llvm/test/Demangle/ms-operators.test +++ b/llvm/test/Demangle/ms-operators.test @@ -143,9 +143,24 @@ ??_7A@B@@6BC@D@@@ ; CHECK: const B::A::`vftable'{for `D::C'} +??_7A@B@@6BC@D@@E@F@@@ +; CHECK: const B::A::`vftable'{for `D::C's `F::E'} + +??_7A@B@@6BC@D@@E@F@@G@H@@@ +; CHECK: const B::A::`vftable'{for `D::C's `F::E's `H::G'} + ??_8Middle2@@7B@ ; CHECK: const Middle2::`vbtable' +??_7A@@6BB@@@ +; CHECK: const A::`vftable'{for `B'} + +??_7A@@6BB@@C@@@ +; CHECK: const A::`vftable'{for `B's `C'} + +??_7A@@6BB@@C@@D@@@ +; CHECK: const A::`vftable'{for `B's `C's `D'} + ??_9Base@@$B7AA ; CHECK: [thunk]: __cdecl Base::`vcall'{8, {flat}} From ff108f7486fafb13f330cec324f59a04442d01d4 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 5 Nov 2025 08:02:55 -0800 Subject: [PATCH 10/61] Fix failures introduced in #166032 (#166574) --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 2 +- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 6412949948c07..0b55c03a46747 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -1301,7 +1301,7 @@ Value *AtomicExpandImpl::insertRMWLLSCLoop( // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is // hard to predict precise branch weigths we mark the branch as "unknown" // (50/50) to prevent misleading optimizations. - setExplicitlyUnknownBranchWeightsIfProfiled(*CondBr, *F, DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*CondBr, DEBUG_TYPE); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); return Loaded; diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 7930b38174e49..0f3e66476f055 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -388,8 +388,7 @@ static void buildPartialInvariantUnswitchConditionalBranch( IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, Direction ? &NormalSucc : &UnswitchedSucc, ProfData); if (!ProfData) - setExplicitlyUnknownBranchWeightsIfProfiled(*BR, *BR->getFunction(), - DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE); } /// Rewrite the PHI nodes in an unswitched loop exit basic block. @@ -3203,8 +3202,7 @@ injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, auto *InvariantBr = Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock); // We don't know anything about the relation between the limits. - setExplicitlyUnknownBranchWeightsIfProfiled( - *InvariantBr, *InvariantBr->getParent()->getParent(), DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*InvariantBr, DEBUG_TYPE); Builder.SetInsertPoint(CheckBlock); Builder.CreateCondBr( From a796d1836930bc721686a728852b820a2ea4de92 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 5 Nov 2025 16:03:31 +0000 Subject: [PATCH 11/61] [gn build] Port 370058777be2 --- llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn index 1a890f6733597..a234d2be67f66 100644 --- a/llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn @@ -12,7 +12,6 @@ static_library("BinaryFormat") { "ELF.cpp", "MachO.cpp", "Magic.cpp", - "Minidump.cpp", "MsgPackDocument.cpp", "MsgPackDocumentYAML.cpp", "MsgPackReader.cpp", From ef6947b098e8086ac0d72086b1c63cb8d82ba797 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 5 Nov 2025 16:03:32 +0000 Subject: [PATCH 12/61] [gn build] Port 3ebed51e997b --- llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn index 065fc6cdd74a3..bd8d9610c2a4a 100644 --- a/llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn @@ -17,6 +17,7 @@ unittest("ClangCodeGenTests") { "BufferSourceTest.cpp", "CheckTargetFeaturesTest.cpp", "CodeGenExternalTest.cpp", + "DemangleTrapReasonInDebugInfo.cpp", "TBAAMetadataTest.cpp", ] } From 9bb67f88ed287ebf386ed7579d05abd5d214e52a Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 5 Nov 2025 16:03:33 +0000 Subject: [PATCH 13/61] [gn build] Port 51d0f6d6e172 --- .../gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn | 1 + .../gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn index 2f84999621e1b..3c3fdf7e16885 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn @@ -82,6 +82,7 @@ static_library("bugprone") { "SmartPtrArrayMismatchCheck.cpp", "SpuriouslyWakeUpFunctionsCheck.cpp", "StandaloneEmptyCheck.cpp", + "StdNamespaceModificationCheck.cpp", "StringConstructorCheck.cpp", "StringIntegerAssignmentCheck.cpp", "StringLiteralWithEmbeddedNulCheck.cpp", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn index ec642b6afad66..1eae289143b5b 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn @@ -16,7 +16,6 @@ static_library("cert") { ] sources = [ "CERTTidyModule.cpp", - "DontModifyStdNamespaceCheck.cpp", "FloatLoopCounter.cpp", "LimitedRandomnessCheck.cpp", "MutatingCopyCheck.cpp", From 3bf0ce15f80ddffe0fff1a52500c79b9bcd011e4 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 5 Nov 2025 16:03:34 +0000 Subject: [PATCH 14/61] [gn build] Port 718a3b268fcf --- llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn b/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn index 393309ee39bfe..a261f2866be47 100644 --- a/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn +++ b/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn @@ -12,7 +12,7 @@ static_library("Passes") { "//llvm/utils/gn/build/libs/pthread", ] sources = [ - "ADRRelaxationPass.cpp", + "AArch64RelaxationPass.cpp", "Aligner.cpp", "AllocCombiner.cpp", "AsmDump.cpp", From ce5dac67ffddfa9b9f141b35a4bf05bdee970676 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 5 Nov 2025 16:03:35 +0000 Subject: [PATCH 15/61] [gn build] Port dd14eb8242d7 --- llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn index a1f5b475e2096..ad72c0069237d 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn @@ -151,6 +151,7 @@ static_library("LLVMRISCVCodeGen") { "RISCVMoveMerger.cpp", "RISCVOptWInstrs.cpp", "RISCVPostRAExpandPseudoInsts.cpp", + "RISCVPromoteConstant.cpp", "RISCVPushPopOptimizer.cpp", "RISCVRedundantCopyElimination.cpp", "RISCVRegisterInfo.cpp", From bb367c14aea99ab744406f3e3a700186fd3f1ad6 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 5 Nov 2025 08:23:22 -0800 Subject: [PATCH 16/61] [polly] Fix unused variable warnings This patch fixes: polly/lib/Transform/ScheduleOptimizer.cpp:935:17: error: unused variable 'File' [-Werror,-Wunused-variable] polly/lib/Transform/ScheduleOptimizer.cpp:936:9: error: unused variable 'Line' [-Werror,-Wunused-variable] polly/lib/Transform/ScheduleOptimizer.cpp:937:17: error: unused variable 'Msg' [-Werror,-Wunused-variable] --- polly/lib/Transform/ScheduleOptimizer.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index cb08397c201f2..f01d3decd9a1c 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -932,13 +932,14 @@ static void runIslScheduleOptimizer( POLLY_DEBUG(dbgs() << "Schedule optimizer calculation exceeds ISL quota\n"); return; } else if (isl_ctx_last_error(Ctx) != isl_error_none) { - const char *File = isl_ctx_last_error_file(Ctx); - int Line = isl_ctx_last_error_line(Ctx); - const char *Msg = isl_ctx_last_error_msg(Ctx); - POLLY_DEBUG( - dbgs() - << "ISL reported an error during the computation of a new schedule at " - << File << ":" << Line << ": " << Msg); + POLLY_DEBUG({ + const char *File = isl_ctx_last_error_file(Ctx); + int Line = isl_ctx_last_error_line(Ctx); + const char *Msg = isl_ctx_last_error_msg(Ctx); + dbgs() << "ISL reported an error during the computation of a new " + "schedule at " + << File << ":" << Line << ": " << Msg; + }); isl_ctx_reset_error(Ctx); return; } else if (Schedule.is_null()) { From 0b5a00aab73967026292d0c1c84b87fabdcc2648 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 5 Nov 2025 08:40:55 -0800 Subject: [PATCH 17/61] [CI][NFC] Reformat Python Files in .ci directory Reviewers: Pull Request: https://github.com/llvm/llvm-project/pull/166587 --- .ci/generate_test_report_github.py | 1 + .ci/generate_test_report_lib.py | 1 + 2 files changed, 2 insertions(+) diff --git a/.ci/generate_test_report_github.py b/.ci/generate_test_report_github.py index 6785e82f3440b..08387de817467 100644 --- a/.ci/generate_test_report_github.py +++ b/.ci/generate_test_report_github.py @@ -8,6 +8,7 @@ import generate_test_report_lib + def compute_platform_title() -> str: logo = ":window:" if platform.system() == "Windows" else ":penguin:" # On Linux the machine value is x86_64 on Windows it is AMD64. diff --git a/.ci/generate_test_report_lib.py b/.ci/generate_test_report_lib.py index 7820fbda803d7..0c025c561f6f7 100644 --- a/.ci/generate_test_report_lib.py +++ b/.ci/generate_test_report_lib.py @@ -100,6 +100,7 @@ def _format_ninja_failures(ninja_failures: list[tuple[str, str]]) -> list[str]: ) return output + def get_failures(junit_objects) -> dict[str, list[tuple[str, str]]]: failures = {} for results in junit_objects: From 99334f74ae1cd57634c87975e32c797bed3865ce Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 5 Nov 2025 08:41:12 -0800 Subject: [PATCH 18/61] [ADT] Add static_assert to llvm::to_address for function types (#166505) This patch aligns llvm::to_address with C++20 std::to_address by adding a static_assert to prevent instantiation with function types. The C++20 standard says that std::to_address is ill-formed on a function type. --- llvm/include/llvm/ADT/STLForwardCompat.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/ADT/STLForwardCompat.h b/llvm/include/llvm/ADT/STLForwardCompat.h index ad94cdede9288..b975a403cd042 100644 --- a/llvm/include/llvm/ADT/STLForwardCompat.h +++ b/llvm/include/llvm/ADT/STLForwardCompat.h @@ -142,7 +142,10 @@ struct identity // NOLINT(readability-identifier-naming) /// The std::pointer_traits<>::to_address(p) variations of these overloads has /// not been implemented. template auto to_address(const Ptr &P) { return P.operator->(); } -template constexpr T *to_address(T *P) { return P; } +template constexpr T *to_address(T *P) { + static_assert(!std::is_function_v); + return P; +} //===----------------------------------------------------------------------===// // Features from C++23 From ab02808c66b0e14c35356c378399ca04a9bc7271 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 5 Nov 2025 08:41:24 -0800 Subject: [PATCH 19/61] [Support] Simplify minIntN and isUIntN (NFC) (#166506) --- llvm/include/llvm/Support/MathExtras.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h index 9bbb8a2a30541..0a253efc2abcb 100644 --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -225,7 +225,7 @@ inline constexpr int64_t minIntN(int64_t N) { if (N == 0) return 0; - return UINT64_C(1) + ~(UINT64_C(1) << (N - 1)); + return UINT64_MAX << (N - 1); } /// Gets the maximum value for a N-bit signed integer. @@ -241,7 +241,7 @@ inline constexpr int64_t maxIntN(int64_t N) { /// Checks if an unsigned integer fits into the given (dynamic) bit width. inline constexpr bool isUIntN(unsigned N, uint64_t x) { - return N >= 64 || x <= maxUIntN(N); + return N >= 64 || (x >> N) == 0; } /// Checks if an signed integer fits into the given (dynamic) bit width. From 0b29c3c1a1bd3d57961dbba7a39b8dae29b4ecf2 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 5 Nov 2025 08:41:32 -0800 Subject: [PATCH 20/61] [Hexagon] Remove redundant declarations (NFC) (#166507) These two functions are decalred in Hexagon.h. Identified with readability-redundant-declaration. --- llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp index 479ac90b7d526..f29a739cb5c07 100644 --- a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp @@ -104,13 +104,6 @@ const std::map QFPInstMap{ {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}}; } // namespace -namespace llvm { - -FunctionPass *createHexagonQFPOptimizer(); -void initializeHexagonQFPOptimizerPass(PassRegistry &); - -} // namespace llvm - namespace { struct HexagonQFPOptimizer : public MachineFunctionPass { From aea75d059f24b4f191d7601f68d61b28f78c4d4d Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 5 Nov 2025 08:41:39 -0800 Subject: [PATCH 21/61] [ObjectYAML] Remove redundant declarations (NFC) (#166508) In C++17, static constexpr members are implicitly inline, so they no longer require an out-of-line definition. Identified with readability-redundant-declaration. --- llvm/lib/ObjectYAML/ELFYAML.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index e5e5fc20728e8..29f291614ffc6 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -37,8 +37,6 @@ unsigned Object::getMachine() const { return *Header.Machine; return llvm::ELF::EM_NONE; } - -constexpr StringRef SectionHeaderTable::TypeStr; } // namespace ELFYAML namespace yaml { From d7c1df38b99bd3ab01b50cfe5fb6b0b2e5044091 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 5 Nov 2025 08:41:47 -0800 Subject: [PATCH 22/61] [llvm] Proofread GoldPlugin.rst (#166509) --- llvm/docs/GoldPlugin.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/docs/GoldPlugin.rst b/llvm/docs/GoldPlugin.rst index 07d2fc203eba5..606f9e0820e60 100644 --- a/llvm/docs/GoldPlugin.rst +++ b/llvm/docs/GoldPlugin.rst @@ -83,7 +83,7 @@ which is why you otherwise need gold to be the installed system linker in your path. ``ar`` and ``nm`` also accept the ``-plugin`` option and it's possible to -to install ``LLVMgold.so`` to ``/usr/lib/bfd-plugins`` for a seamless setup. +install ``LLVMgold.so`` to ``/usr/lib/bfd-plugins`` for a seamless setup. If you built your own gold, be sure to install the ``ar`` and ``nm-new`` you built to ``/usr/bin``. @@ -143,7 +143,7 @@ Quickstart for using LTO with autotooled projects ================================================= Once your system ``ld``, ``ar``, and ``nm`` all support LLVM bitcode, -everything is in place for an easy to use LTO build of autotooled projects: +everything is in place for an easy-to-use LTO build of autotooled projects: * Follow the instructions :ref:`on how to build LLVMgold.so `. From 6fec104b45734034f3772747e5adb47b1f7ee658 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= Date: Wed, 5 Nov 2025 17:42:44 +0100 Subject: [PATCH 23/61] [AMDGPU] Enable typechecks for __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16 (#166547) We didn't remove the `t` for this builtin in the past due to not being sure if we should use `float16/half`. This patch doesn't fix the _Float16/half question, I'll address that in a separate patch later (after discussing the options on our weekly meeting). At the moment we maintain the `h` for this builtin (which is likely not what we want for HIP). --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +- .../test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-add.hip | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 36cb527a9c806..2b6fcb1fd479b 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -180,7 +180,7 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n") BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32, "iiQbiiIi", "") TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32, "ffQbiiIi", "", "atomic-fadd-rtn-insts") -TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2hV2hQbiiIi", "t", "atomic-buffer-global-pk-add-f16-insts") +TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2hV2hQbiiIi", "", "atomic-buffer-global-pk-add-f16-insts") TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32, "ffQbiiIi", "", "atomic-fmin-fmax-global-f32") TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32, "ffQbiiIi", "", "atomic-fmin-fmax-global-f32") diff --git a/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-add.hip b/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-add.hip index 8ee64d486f4f4..fea86162c801d 100644 --- a/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-add.hip +++ b/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-add.hip @@ -14,5 +14,9 @@ __device__ void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float __device__ void test_raw_ptr_atomics_err(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, float16x2_t v2f16, int offset, int soffset) { i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}} f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}} - v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0, 4); + v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}} +} + +__device__ void test_raw_ptr_atomics_f16_retty(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, float16x2_t v2f16, int offset, int soffset) { + v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0); } From 3154a841be807943fc83604ab8b2d9ecf300ac21 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 5 Nov 2025 16:44:58 +0000 Subject: [PATCH 24/61] [AMDGPU] Autogenerate R600 packetizer checks (#166570) --- llvm/test/CodeGen/AMDGPU/packetizer.ll | 52 ++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/packetizer.ll b/llvm/test/CodeGen/AMDGPU/packetizer.ll index aab035f811434..b9bf13886d366 100644 --- a/llvm/test/CodeGen/AMDGPU/packetizer.ll +++ b/llvm/test/CodeGen/AMDGPU/packetizer.ll @@ -1,13 +1,49 @@ -; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s -; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s - -; CHECK: {{^}}test: -; CHECK: BIT_ALIGN_INT T{{[0-9]}}.X -; CHECK: BIT_ALIGN_INT T{{[0-9]}}.Y -; CHECK: BIT_ALIGN_INT T{{[0-9]}}.Z -; CHECK: BIT_ALIGN_INT * T{{[0-9]}}.W +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s -check-prefix=R600 +; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s -check-prefix=CM define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %x_arg, i32 %y_arg, i32 %z_arg, i32 %w_arg, i32 %e) { +; R600-LABEL: test: +; R600: ; %bb.0: ; %entry +; R600-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; R600-NEXT: CF_END +; R600-NEXT: PAD +; R600-NEXT: ALU clause starting at 4: +; R600-NEXT: ADD_INT T0.Y, KC0[3].X, 1, +; R600-NEXT: ADD_INT T0.Z, KC0[3].Y, 1, +; R600-NEXT: ADD_INT T0.W, KC0[2].Z, 1, +; R600-NEXT: ADD_INT * T1.W, KC0[2].W, 1, +; R600-NEXT: BIT_ALIGN_INT T0.X, PS, PS, KC0[3].Z, +; R600-NEXT: BIT_ALIGN_INT T1.Y, PV.W, PV.W, KC0[3].Z, +; R600-NEXT: BIT_ALIGN_INT T0.Z, PV.Z, PV.Z, KC0[3].Z, +; R600-NEXT: BIT_ALIGN_INT * T0.W, PV.Y, PV.Y, KC0[3].Z, +; R600-NEXT: OR_INT T0.W, PV.W, PV.Z, +; R600-NEXT: OR_INT * T1.W, PV.Y, PV.X, +; R600-NEXT: OR_INT T0.X, PS, PV.W, +; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; CM-LABEL: test: +; CM: ; %bb.0: ; %entry +; CM-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X +; CM-NEXT: CF_END +; CM-NEXT: PAD +; CM-NEXT: ALU clause starting at 4: +; CM-NEXT: ADD_INT T0.X, KC0[3].X, 1, +; CM-NEXT: ADD_INT T0.Y, KC0[3].Y, 1, +; CM-NEXT: ADD_INT T0.Z, KC0[2].Z, 1, +; CM-NEXT: ADD_INT * T0.W, KC0[2].W, 1, +; CM-NEXT: BIT_ALIGN_INT T1.X, PV.W, PV.W, KC0[3].Z, +; CM-NEXT: BIT_ALIGN_INT T1.Y, PV.Z, PV.Z, KC0[3].Z, +; CM-NEXT: BIT_ALIGN_INT T0.Z, PV.Y, PV.Y, KC0[3].Z, +; CM-NEXT: BIT_ALIGN_INT * T0.W, PV.X, PV.X, KC0[3].Z, +; CM-NEXT: OR_INT T0.Z, PV.W, PV.Z, +; CM-NEXT: OR_INT * T0.W, PV.Y, PV.X, +; CM-NEXT: OR_INT * T0.X, PV.W, PV.Z, +; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: %shl = sub i32 32, %e %x = add i32 %x_arg, 1 From d4e3a2327da11e07961117b3b443de24a8d80095 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 5 Nov 2025 18:45:33 +0200 Subject: [PATCH 25/61] [clang] [doc] Document that the ms_abi attribute works on aarch64 too (#166373) Since 022e782e75766e9dd98b9e18572129cd313f3ab5 (2017) this attribute has an effect on both aarch64 and x86_64; update the docs to reflect this. --- clang/include/clang/Basic/AttrDocs.td | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 2fdd041c1b46e..1be9a96aa44de 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3450,9 +3450,9 @@ Mac, and BSD. This attribute has no effect on other targets. def MSABIDocs : Documentation { let Category = DocCatCallingConvs; let Content = [{ -On non-Windows x86_64 targets, this attribute changes the calling convention of -a function to match the default convention used on Windows x86_64. This -attribute has no effect on Windows targets or non-x86_64 targets. +On non-Windows x86_64 and aarch64 targets, this attribute changes the calling convention of +a function to match the default convention used on Windows. This +attribute has no effect on Windows targets or non-x86_64, non-aarch64 targets. }]; } From 95c87505255032c1cfcd4091e1e114865f62be9a Mon Sep 17 00:00:00 2001 From: Joshua Rodriguez Date: Wed, 5 Nov 2025 16:53:45 +0000 Subject: [PATCH 26/61] [AArch64][GlobalISel] Added pmull/pmull64 intrinsic support (#165740) GISel no longer falls back onto SDAG when attempting to lower the pmull and pmull64 intrinsics. --- llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 + .../AArch64/GISel/AArch64LegalizerInfo.cpp | 3 + .../AArch64/GISel/AArch64RegisterBankInfo.cpp | 1 + llvm/test/CodeGen/AArch64/aarch64-smull.ll | 84 ++++++--- .../test/CodeGen/AArch64/arm64-neon-3vdiff.ll | 55 +++--- llvm/test/CodeGen/AArch64/arm64-vmul.ll | 161 +++++++++++------- .../CodeGen/AArch64/highextractbitcast.ll | 26 +-- 7 files changed, 219 insertions(+), 118 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 30b7b03f7a69a..52b216c7fe0f0 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -197,6 +197,12 @@ def G_SMULL : AArch64GenericInstruction { let hasSideEffects = 0; } +def G_PMULL : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src1, type1:$src2); + let hasSideEffects = 0; +} + def G_UADDLP : AArch64GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); @@ -273,6 +279,7 @@ def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 5f93847bc680e..038ad77ae69b2 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1809,6 +1809,9 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return LowerBinOp(TargetOpcode::G_FMAXNUM); case Intrinsic::aarch64_neon_fminnm: return LowerBinOp(TargetOpcode::G_FMINNUM); + case Intrinsic::aarch64_neon_pmull: + case Intrinsic::aarch64_neon_pmull64: + return LowerBinOp(AArch64::G_PMULL); case Intrinsic::aarch64_neon_smull: return LowerBinOp(AArch64::G_SMULL); case Intrinsic::aarch64_neon_umull: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 6d2d70511e894..6b920f05227ad 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -560,6 +560,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, case TargetOpcode::G_FCMP: case TargetOpcode::G_LROUND: case TargetOpcode::G_LLROUND: + case AArch64::G_PMULL: return true; case TargetOpcode::G_INTRINSIC: switch (cast(MI).getIntrinsicID()) { diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll index 0cd885e599817..e85e808921c87 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -1,10 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NEON ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SVE -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmlsl_pmlsl2_v8i16_uzp1 +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <8 x i16> @smull_v8i8_v8i16(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: smull_v8i8_v8i16: @@ -1832,14 +1829,33 @@ entry: } define void @pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) { -; CHECK-LABEL: pmlsl2_v8i16_uzp1: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q2, [x1, #16] -; CHECK-NEXT: uzp1 v2.16b, v0.16b, v2.16b -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v2.16b -; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: pmlsl2_v8i16_uzp1: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: ldr q2, [x1, #16] +; CHECK-NEON-NEXT: uzp1 v2.16b, v0.16b, v2.16b +; CHECK-NEON-NEXT: pmull2 v0.8h, v0.16b, v2.16b +; CHECK-NEON-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-NEON-NEXT: str q0, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: pmlsl2_v8i16_uzp1: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ldr q2, [x1, #16] +; CHECK-SVE-NEXT: uzp1 v2.16b, v0.16b, v2.16b +; CHECK-SVE-NEXT: pmull2 v0.8h, v0.16b, v2.16b +; CHECK-SVE-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-SVE-NEXT: str q0, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: pmlsl2_v8i16_uzp1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q2, [x1, #16] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: xtn v2.8b, v2.8h +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v2.8b +; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %5 = getelementptr inbounds i32, ptr %3, i64 4 %6 = load <8 x i16>, ptr %5, align 4 %7 = trunc <8 x i16> %6 to <8 x i8> @@ -1991,16 +2007,40 @@ define void @umlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) { } define void @pmlsl_pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3, i32 %4) { -; CHECK-LABEL: pmlsl_pmlsl2_v8i16_uzp1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: uzp1 v2.16b, v2.16b, v3.16b -; CHECK-NEXT: pmull v3.8h, v0.8b, v2.8b -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v2.16b -; CHECK-NEXT: add v0.8h, v3.8h, v0.8h -; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: pmlsl_pmlsl2_v8i16_uzp1: +; CHECK-NEON: // %bb.0: // %entry +; CHECK-NEON-NEXT: ldp q2, q3, [x1] +; CHECK-NEON-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-NEON-NEXT: pmull v3.8h, v0.8b, v2.8b +; CHECK-NEON-NEXT: pmull2 v0.8h, v0.16b, v2.16b +; CHECK-NEON-NEXT: add v0.8h, v3.8h, v0.8h +; CHECK-NEON-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-NEON-NEXT: str q0, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: pmlsl_pmlsl2_v8i16_uzp1: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ldp q2, q3, [x1] +; CHECK-SVE-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-SVE-NEXT: pmull v3.8h, v0.8b, v2.8b +; CHECK-SVE-NEXT: pmull2 v0.8h, v0.16b, v2.16b +; CHECK-SVE-NEXT: add v0.8h, v3.8h, v0.8h +; CHECK-SVE-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-SVE-NEXT: str q0, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: pmlsl_pmlsl2_v8i16_uzp1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldp q2, q3, [x1] +; CHECK-GI-NEXT: mov d4, v0.d[1] +; CHECK-GI-NEXT: xtn v2.8b, v2.8h +; CHECK-GI-NEXT: xtn v3.8b, v3.8h +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v2.8b +; CHECK-GI-NEXT: pmull v2.8h, v4.8b, v3.8b +; CHECK-GI-NEXT: add v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %5 = load <8 x i16>, ptr %3, align 4 %6 = trunc <8 x i16> %5 to <8 x i8> diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll index 2a8b3ce2ae10b..8cb319b2c3368 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll @@ -1,11 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for test_vmull_p8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_p64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p64 +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5 @@ -2721,14 +2716,24 @@ entry: } define i128 @test_vmull_p64(i64 %a, i64 %b) #4 { -; CHECK-LABEL: test_vmull_p64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov d0, x1 -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d -; CHECK-NEXT: mov x1, v0.d[1] -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vmull_p64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov d0, x1 +; CHECK-SD-NEXT: fmov d1, x0 +; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-SD-NEXT: mov x1, v0.d[1] +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vmull_p64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: fmov d1, x1 +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: fmov x1, d1 +; CHECK-GI-NEXT: ret entry: %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b) %vmull3.i = bitcast <16 x i8> %vmull2.i to i128 @@ -2736,12 +2741,22 @@ entry: } define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 { -; CHECK-LABEL: test_vmull_high_p64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d -; CHECK-NEXT: mov x1, v0.d[1] -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vmull_high_p64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d +; CHECK-SD-NEXT: mov x1, v0.d[1] +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vmull_high_p64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: fmov x1, d1 +; CHECK-GI-NEXT: ret entry: %0 = extractelement <2 x i64> %a, i32 1 %1 = extractelement <2 x i64> %b, i32 1 diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll index e6df9f2fb2c56..90abc7d389c13 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -2,44 +2,35 @@ ; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for pmull8h -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for commutable_pmull8h -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_1s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_low -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_high -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_low -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_high -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_commutable_pmull_64 +; CHECK-GI: warning: Instruction selection used fallback path for sqdmulh_1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d define <8 x i16> @smull8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: smull8h: @@ -2895,11 +2886,18 @@ define <8 x i16> @pmull_from_extract_dup_low(<16 x i8> %lhs, i8 %rhs) { } define <8 x i16> @pmull_from_extract_dup_high(<16 x i8> %lhs, i8 %rhs) { -; CHECK-LABEL: pmull_from_extract_dup_high: -; CHECK: // %bb.0: -; CHECK-NEXT: dup v1.16b, w0 -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: pmull_from_extract_dup_high: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: dup v1.16b, w0 +; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: pmull_from_extract_dup_high: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: dup v1.8b, w0 +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0 %rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> @@ -2924,12 +2922,20 @@ define <8 x i16> @pmull_from_extract_duplane_low(<16 x i8> %lhs, <8 x i8> %rhs) } define <8 x i16> @pmull_from_extract_duplane_high(<16 x i8> %lhs, <8 x i8> %rhs) { -; CHECK-LABEL: pmull_from_extract_duplane_high: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: dup v1.16b, v1.b[0] -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: pmull_from_extract_duplane_high: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: dup v1.16b, v1.b[0] +; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: pmull_from_extract_duplane_high: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: dup v1.8b, v1.b[0] +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> %rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> @@ -3245,21 +3251,35 @@ define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind { } define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind { -; CHECK-LABEL: test_pmull_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov d0, x1 -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_pmull_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov d0, x1 +; CHECK-SD-NEXT: fmov d1, x0 +; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_pmull_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: fmov d1, x1 +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: ret %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r) ret <16 x i8> %val } define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind { -; CHECK-LABEL: test_pmull_high_64: -; CHECK: // %bb.0: -; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_pmull_high_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_pmull_high_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: ret %l_hi = extractelement <2 x i64> %l, i32 1 %r_hi = extractelement <2 x i64> %r, i32 1 %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l_hi, i64 %r_hi) @@ -3267,13 +3287,22 @@ define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind { } define <16 x i8> @test_commutable_pmull_64(i64 %l, i64 %r) nounwind { -; CHECK-LABEL: test_commutable_pmull_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov d0, x1 -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d -; CHECK-NEXT: add v0.16b, v0.16b, v0.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_commutable_pmull_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov d0, x1 +; CHECK-SD-NEXT: fmov d1, x0 +; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_commutable_pmull_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: fmov d1, x1 +; CHECK-GI-NEXT: pmull v2.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-GI-NEXT: add v0.16b, v2.16b, v0.16b +; CHECK-GI-NEXT: ret %1 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r) %2 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %r, i64 %l) %3 = add <16 x i8> %1, %2 diff --git a/llvm/test/CodeGen/AArch64/highextractbitcast.ll b/llvm/test/CodeGen/AArch64/highextractbitcast.ll index df4889b6f09de..bd6c168ce8776 100644 --- a/llvm/test/CodeGen/AArch64/highextractbitcast.ll +++ b/llvm/test/CodeGen/AArch64/highextractbitcast.ll @@ -1,10 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes CHECK,CHECK-LE ; RUN: llc -mtriple=aarch64_be-unknown-linux-gnu < %s | FileCheck %s --check-prefix CHECK-BE -; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for test_pmull_high_p8_128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_p8_64 +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel < %s | FileCheck %s --check-prefixes CHECK,CHECK-GI declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) @@ -521,12 +518,12 @@ entry: } define <8 x i16> @test_pmull_high_p8_128(i128 %aa, i128 %bb) { -; CHECK-LABEL: test_pmull_high_p8_128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov d0, x3 -; CHECK-NEXT: fmov d1, x1 -; CHECK-NEXT: pmull v0.8h, v1.8b, v0.8b -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_pmull_high_p8_128: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: fmov d0, x3 +; CHECK-LE-NEXT: fmov d1, x1 +; CHECK-LE-NEXT: pmull v0.8h, v1.8b, v0.8b +; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: test_pmull_high_p8_128: ; CHECK-BE: // %bb.0: // %entry @@ -538,6 +535,15 @@ define <8 x i16> @test_pmull_high_p8_128(i128 %aa, i128 %bb) { ; CHECK-BE-NEXT: rev64 v0.8h, v0.8h ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ret +; +; CHECK-GI-LABEL: test_pmull_high_p8_128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov v0.d[0], x0 +; CHECK-GI-NEXT: mov v1.d[0], x2 +; CHECK-GI-NEXT: mov v0.d[1], x1 +; CHECK-GI-NEXT: mov v1.d[1], x3 +; CHECK-GI-NEXT: pmull2 v0.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: ret entry: %a = bitcast i128 %aa to <16 x i8> %b = bitcast i128 %bb to <16 x i8> From 0b72899f6db93dab140415e800130c7c82c255b1 Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Wed, 5 Nov 2025 16:59:55 +0000 Subject: [PATCH 27/61] [libc][math] Refactor the `math_errhandling` macro definition (#166350) This patch refactors the logic to define each component of the `math_errhandling` macro. It assumes that math error handling is supported by the target and the C library unless otherwise disabled in the preprocessor logic. In addition to the refactoring, the support for error handling via exceptions is explicitly disabled for Arm targets with no FPU, that is, where `__ARM_FP` is not defined. This is because LLVM libc does not provide a floating-point environment for Arm no-FP configurations (or at least one with support for FP exceptions). --- libc/include/llvm-libc-macros/math-macros.h | 31 ++++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/libc/include/llvm-libc-macros/math-macros.h b/libc/include/llvm-libc-macros/math-macros.h index 6697ce5b03851..e1b12e3010fe9 100644 --- a/libc/include/llvm-libc-macros/math-macros.h +++ b/libc/include/llvm-libc-macros/math-macros.h @@ -42,14 +42,37 @@ #define FP_LLOGBNAN LONG_MAX #endif -#if defined(__NVPTX__) || defined(__AMDGPU__) || defined(__FAST_MATH__) -#define math_errhandling 0 -#elif defined(__NO_MATH_ERRNO__) -#define math_errhandling (MATH_ERREXCEPT) +// Math error handling. Target support is assumed to be existent unless +// explicitly disabled. +#if defined(__NVPTX__) || defined(__AMDGPU__) || defined(__FAST_MATH__) || \ + defined(__NO_MATH_ERRNO__) +#define __LIBC_SUPPORTS_MATH_ERRNO 0 +#else +#define __LIBC_SUPPORTS_MATH_ERRNO 1 +#endif + +#if defined(__FAST_MATH__) || \ + ((defined(__arm__) || defined(_M_ARM) || defined(__thumb__) || \ + defined(__aarch64__) || defined(_M_ARM64)) && \ + !defined(__ARM_FP)) +#define __LIBC_SUPPORTS_MATH_ERREXCEPT 0 #else +#define __LIBC_SUPPORTS_MATH_ERREXCEPT 1 +#endif + +#if __LIBC_SUPPORTS_MATH_ERRNO && __LIBC_SUPPORTS_MATH_ERREXCEPT #define math_errhandling (MATH_ERRNO | MATH_ERREXCEPT) +#elif __LIBC_SUPPORTS_MATH_ERRNO +#define math_errhandling (MATH_ERRNO) +#elif __LIBC_SUPPORTS_MATH_ERREXCEPT +#define math_errhandling (MATH_ERREXCEPT) +#else +#define math_errhandling 0 #endif +#undef __LIBC_SUPPORTS_MATH_ERRNO +#undef __LIBC_SUPPORTS_MATH_ERREXCEPT + // POSIX math constants // https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/math.h.html #define M_E (__extension__ 0x1.5bf0a8b145769p1) From 9b1719efa063b78a996d837b8b4bcb11ddcffcf8 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Wed, 5 Nov 2025 17:04:57 +0000 Subject: [PATCH 28/61] [lldb] Mark single-argument SourceLanguage constructors explicit (#166527) This avoids unintentional comparisons between `SourceLanguage` and `LanguageType`. Also marks `operator bool` explicit so we don't implicitly convert to bool. --- lldb/include/lldb/lldb-private-types.h | 11 ++++++++--- lldb/source/Breakpoint/BreakpointLocation.cpp | 2 +- lldb/source/Commands/CommandObjectDWIMPrint.cpp | 13 +++++++------ lldb/source/Expression/UserExpression.cpp | 2 +- .../Clang/ClangExpressionParser.cpp | 2 +- lldb/source/Target/StackFrame.cpp | 6 +++--- lldb/source/Target/Target.cpp | 2 +- 7 files changed, 22 insertions(+), 16 deletions(-) diff --git a/lldb/include/lldb/lldb-private-types.h b/lldb/include/lldb/lldb-private-types.h index b82a2b8aa0574..185467e91bf62 100644 --- a/lldb/include/lldb/lldb-private-types.h +++ b/lldb/include/lldb/lldb-private-types.h @@ -102,13 +102,18 @@ struct RegisterSet { /// A type-erased pair of llvm::dwarf::SourceLanguageName and version. struct SourceLanguage { SourceLanguage() = default; - SourceLanguage(lldb::LanguageType language_type); + explicit SourceLanguage(lldb::LanguageType language_type); + SourceLanguage(uint16_t name, uint32_t version) : name(name), version(version) {} - SourceLanguage(std::optional> name_vers) + + explicit SourceLanguage( + std::optional> name_vers) : name(name_vers ? name_vers->first : 0), version(name_vers ? name_vers->second : 0) {} - operator bool() const { return name > 0; } + + explicit operator bool() const { return name > 0; } + lldb::LanguageType AsLanguageType() const; llvm::StringRef GetDescription() const; bool IsC() const; diff --git a/lldb/source/Breakpoint/BreakpointLocation.cpp b/lldb/source/Breakpoint/BreakpointLocation.cpp index f25209c15e007..25285beb7ffd5 100644 --- a/lldb/source/Breakpoint/BreakpointLocation.cpp +++ b/lldb/source/Breakpoint/BreakpointLocation.cpp @@ -251,7 +251,7 @@ bool BreakpointLocation::ConditionSaysStop(ExecutionContext &exe_ctx, } m_user_expression_sp.reset(GetTarget().GetUserExpressionForLanguage( - condition.GetText(), llvm::StringRef(), language, + condition.GetText(), llvm::StringRef(), SourceLanguage{language}, Expression::eResultTypeAny, EvaluateExpressionOptions(), nullptr, error)); if (error.Fail()) { diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp index 0d9eb45732161..40f00c90bbbfb 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -95,9 +95,9 @@ void CommandObjectDWIMPrint::DoExecute(StringRef command, StackFrame *frame = m_exe_ctx.GetFramePtr(); // Either the language was explicitly specified, or we check the frame. - lldb::LanguageType language = m_expr_options.language; - if (language == lldb::eLanguageTypeUnknown && frame) - language = frame->GuessLanguage().AsLanguageType(); + SourceLanguage language{m_expr_options.language}; + if (!language && frame) + language = frame->GuessLanguage(); // Add a hint if object description was requested, but no description // function was implemented. @@ -119,8 +119,8 @@ void CommandObjectDWIMPrint::DoExecute(StringRef command, "^<\\S+: 0x[[:xdigit:]]{5,}>\\s*$"); if (GetDebugger().GetShowDontUsePoHint() && target_ptr && - (language == lldb::eLanguageTypeSwift || - language == lldb::eLanguageTypeObjC) && + (language.AsLanguageType() == lldb::eLanguageTypeSwift || + language.IsObjC()) && std::regex_match(output.data(), swift_class_regex)) { result.AppendNote( @@ -193,7 +193,8 @@ void CommandObjectDWIMPrint::DoExecute(StringRef command, // Second, try `expr` as a persistent variable. if (expr.starts_with("$")) - if (auto *state = target.GetPersistentExpressionStateForLanguage(language)) + if (auto *state = target.GetPersistentExpressionStateForLanguage( + language.AsLanguageType())) if (auto var_sp = state->GetVariable(expr)) if (auto valobj_sp = var_sp->GetValueObject()) { dump_val_object(*valobj_sp); diff --git a/lldb/source/Expression/UserExpression.cpp b/lldb/source/Expression/UserExpression.cpp index af4b477660eeb..5563eba21777e 100644 --- a/lldb/source/Expression/UserExpression.cpp +++ b/lldb/source/Expression/UserExpression.cpp @@ -246,7 +246,7 @@ UserExpression::Evaluate(ExecutionContext &exe_ctx, // language in the target's properties if specified, else default to the // langage for the frame. if (!language) { - if (target->GetLanguage() != lldb::eLanguageTypeUnknown) + if (target->GetLanguage()) language = target->GetLanguage(); else if (StackFrame *frame = exe_ctx.GetFramePtr()) language = frame->GetLanguage(); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp index 990074566be7e..6bab880b4d521 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -1502,7 +1502,7 @@ lldb_private::Status ClangExpressionParser::DoPrepareForExecution( LLDB_LOGF(log, "%s - Current expression language is %s\n", __FUNCTION__, lang.GetDescription().data()); lldb::ProcessSP process_sp = exe_ctx.GetProcessSP(); - if (process_sp && lang != lldb::eLanguageTypeUnknown) { + if (process_sp && lang) { auto runtime = process_sp->GetLanguageRuntime(lang.AsLanguageType()); if (runtime) runtime->GetIRPasses(custom_passes); diff --git a/lldb/source/Target/StackFrame.cpp b/lldb/source/Target/StackFrame.cpp index 2ed58c5331df4..95b515412d693 100644 --- a/lldb/source/Target/StackFrame.cpp +++ b/lldb/source/Target/StackFrame.cpp @@ -1344,18 +1344,18 @@ const char *StackFrame::GetDisplayFunctionName() { SourceLanguage StackFrame::GetLanguage() { CompileUnit *cu = GetSymbolContext(eSymbolContextCompUnit).comp_unit; if (cu) - return cu->GetLanguage(); + return SourceLanguage{cu->GetLanguage()}; return {}; } SourceLanguage StackFrame::GuessLanguage() { SourceLanguage lang_type = GetLanguage(); - if (lang_type == eLanguageTypeUnknown) { + if (!lang_type) { SymbolContext sc = GetSymbolContext(eSymbolContextFunction | eSymbolContextSymbol); if (sc.function) - lang_type = LanguageType(sc.function->GetMangled().GuessLanguage()); + lang_type = SourceLanguage(sc.function->GetMangled().GuessLanguage()); else if (sc.symbol) lang_type = SourceLanguage(sc.symbol->GetMangled().GuessLanguage()); } diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index a23091ad09c6d..e53fc7a1e1bda 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -4945,7 +4945,7 @@ void TargetProperties::SetStandardErrorPath(llvm::StringRef path) { SourceLanguage TargetProperties::GetLanguage() const { const uint32_t idx = ePropertyLanguage; - return {GetPropertyAtIndexAs(idx, {})}; + return SourceLanguage{GetPropertyAtIndexAs(idx, {})}; } llvm::StringRef TargetProperties::GetExpressionPrefixContents() { From 056d2c12f75654b4b78c938a5243fa57efbd1547 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 5 Nov 2025 09:10:36 -0800 Subject: [PATCH 29/61] RuntimeLibcalls: Split lowering decisions into LibcallLoweringInfo (#164987) Introduce a new class for the TargetLowering usage. This tracks the subtarget specific lowering decisions for which libcall to use. RuntimeLibcallsInfo is a module level property, which may have multiple implementations of a particular libcall available. This attempts to be a minimum boilerplate patch to introduce the new concept. In the future we should have a tablegen way of selecting which implementations should be used for a subtarget. Currently we do have some conflicting implementations added, it just happens to work out that the default cases to prefer is alphabetically first (plus some of these still are using manual overrides in TargetLowering constructors). --- .../llvm/CodeGen/LibcallLoweringInfo.h | 66 ++++++++++++++++ llvm/include/llvm/CodeGen/TargetLowering.h | 19 +++-- llvm/include/llvm/IR/RuntimeLibcalls.h | 60 +++----------- llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/LibcallLoweringInfo.cpp | 26 +++++++ llvm/lib/CodeGen/TargetLoweringBase.cpp | 8 +- llvm/lib/IR/RuntimeLibcalls.cpp | 1 + llvm/lib/LTO/LTO.cpp | 7 +- .../WebAssemblyRuntimeLibcallSignatures.cpp | 20 +++-- .../Utils/DeclareRuntimeLibcalls.cpp | 4 +- .../RuntimeLibcallEmitter-calling-conv.td | 60 +++++++------- .../RuntimeLibcallEmitter-conflict-warning.td | 30 +++---- llvm/test/TableGen/RuntimeLibcallEmitter.td | 78 +++++++++---------- .../Util/DeclareRuntimeLibcalls/basic.ll | 9 ++- .../DeclareRuntimeLibcalls/sincos_stret.ll | 6 +- .../TableGen/Basic/RuntimeLibcallsEmitter.cpp | 19 ++--- 16 files changed, 238 insertions(+), 176 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/LibcallLoweringInfo.h create mode 100644 llvm/lib/CodeGen/LibcallLoweringInfo.cpp diff --git a/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h b/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h new file mode 100644 index 0000000000000..e8eceeed6aca6 --- /dev/null +++ b/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h @@ -0,0 +1,66 @@ +//===- LibcallLoweringInfo.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/RuntimeLibcalls.h" + +namespace llvm { + +class LibcallLoweringInfo { +private: + LLVM_ABI const RTLIB::RuntimeLibcallsInfo &RTLCI; + /// Stores the implementation choice for each each libcall. + LLVM_ABI RTLIB::LibcallImpl LibcallImpls[RTLIB::UNKNOWN_LIBCALL + 1] = { + RTLIB::Unsupported}; + +public: + LLVM_ABI LibcallLoweringInfo(const RTLIB::RuntimeLibcallsInfo &RTLCI); + + /// Get the libcall routine name for the specified libcall. + // FIXME: This should be removed. Only LibcallImpl should have a name. + LLVM_ABI const char *getLibcallName(RTLIB::Libcall Call) const { + // FIXME: Return StringRef + return RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LibcallImpls[Call]) + .data(); + } + + /// Return the lowering's selection of implementation call for \p Call + LLVM_ABI RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const { + return LibcallImpls[Call]; + } + + /// Rename the default libcall routine name for the specified libcall. + LLVM_ABI void setLibcallImpl(RTLIB::Libcall Call, RTLIB::LibcallImpl Impl) { + LibcallImpls[Call] = Impl; + } + + // FIXME: Remove this wrapper in favor of directly using + // getLibcallImplCallingConv + LLVM_ABI CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { + return RTLCI.LibcallImplCallingConvs[LibcallImpls[Call]]; + } + + /// Get the CallingConv that should be used for the specified libcall. + LLVM_ABI CallingConv::ID + getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const { + return RTLCI.LibcallImplCallingConvs[Call]; + } + + /// Return a function impl compatible with RTLIB::MEMCPY, or + /// RTLIB::Unsupported if fully unsupported. + RTLIB::LibcallImpl getMemcpyImpl() const { + RTLIB::LibcallImpl Memcpy = getLibcallImpl(RTLIB::MEMCPY); + if (Memcpy == RTLIB::Unsupported) { + // Fallback to memmove if memcpy isn't available. + return getLibcallImpl(RTLIB::MEMMOVE); + } + + return Memcpy; + } +}; + +} // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index b229659415d55..2550c2bee5f71 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -29,6 +29,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/LibcallLoweringInfo.h" #include "llvm/CodeGen/LowLevelTypeUtils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RuntimeLibcallUtil.h" @@ -3597,7 +3598,7 @@ class LLVM_ABI TargetLoweringBase { } const RTLIB::RuntimeLibcallsInfo &getRuntimeLibcallsInfo() const { - return Libcalls; + return RuntimeLibcallInfo; } void setLibcallImpl(RTLIB::Libcall Call, RTLIB::LibcallImpl Impl) { @@ -3610,9 +3611,9 @@ class LLVM_ABI TargetLoweringBase { } /// Get the libcall routine name for the specified libcall. + // FIXME: This should be removed. Only LibcallImpl should have a name. const char *getLibcallName(RTLIB::Libcall Call) const { - // FIXME: Return StringRef - return Libcalls.getLibcallName(Call).data(); + return Libcalls.getLibcallName(Call); } /// Get the libcall routine name for the specified libcall implementation @@ -3625,7 +3626,7 @@ class LLVM_ABI TargetLoweringBase { /// Check if this is valid libcall for the current module, otherwise /// RTLIB::Unsupported. RTLIB::LibcallImpl getSupportedLibcallImpl(StringRef FuncName) const { - return Libcalls.getSupportedLibcallImpl(FuncName); + return RuntimeLibcallInfo.getSupportedLibcallImpl(FuncName); } /// Get the comparison predicate that's to be used to test the result of the @@ -3633,11 +3634,6 @@ class LLVM_ABI TargetLoweringBase { /// floating-point compare libcalls. ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const; - /// Set the CallingConv that should be used for the specified libcall. - void setLibcallImplCallingConv(RTLIB::LibcallImpl Call, CallingConv::ID CC) { - Libcalls.setLibcallImplCallingConv(Call, CC); - } - /// Get the CallingConv that should be used for the specified libcall /// implementation. CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const { @@ -3834,8 +3830,11 @@ class LLVM_ABI TargetLoweringBase { std::map, MVT::SimpleValueType> PromoteToType; + /// FIXME: This should not live here; it should come from an analysis. + const RTLIB::RuntimeLibcallsInfo RuntimeLibcallInfo; + /// The list of libcalls that the target will use. - RTLIB::RuntimeLibcallsInfo Libcalls; + LibcallLoweringInfo Libcalls; /// The bits of IndexedModeActions used to store the legalisation actions /// We store the data as | ML | MS | L | S | each taking 4 bits. diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index bae760b3f981d..78e4b1723aafa 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -42,6 +42,8 @@ template <> struct enum_iteration_traits { static constexpr bool is_iterable = true; }; +class LibcallLoweringInfo; + namespace RTLIB { // Return an iterator over all Libcall values. @@ -70,6 +72,8 @@ struct RuntimeLibcallsInfo { LibcallImplBitset AvailableLibcallImpls; public: + friend class llvm::LibcallLoweringInfo; + explicit RuntimeLibcallsInfo( const Triple &TT, ExceptionHandling ExceptionModel = ExceptionHandling::None, @@ -85,17 +89,6 @@ struct RuntimeLibcallsInfo { initLibcalls(TT, ExceptionModel, FloatABI, EABIVersion, ABIName); } - /// Rename the default libcall routine name for the specified libcall. - void setLibcallImpl(RTLIB::Libcall Call, RTLIB::LibcallImpl Impl) { - LibcallImpls[Call] = Impl; - } - - /// Get the libcall routine name for the specified libcall. - // FIXME: This should be removed. Only LibcallImpl should have a name. - StringRef getLibcallName(RTLIB::Libcall Call) const { - return getLibcallImplName(LibcallImpls[Call]); - } - /// Get the libcall routine name for the specified libcall implementation. static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl) { if (CallImpl == RTLIB::Unsupported) @@ -105,42 +98,24 @@ struct RuntimeLibcallsInfo { RuntimeLibcallNameSizeTable[CallImpl]); } - /// Return the lowering's selection of implementation call for \p Call - RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const { - return LibcallImpls[Call]; - } - /// Set the CallingConv that should be used for the specified libcall /// implementation void setLibcallImplCallingConv(RTLIB::LibcallImpl Call, CallingConv::ID CC) { LibcallImplCallingConvs[Call] = CC; } - // FIXME: Remove this wrapper in favor of directly using - // getLibcallImplCallingConv - CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { - return LibcallImplCallingConvs[LibcallImpls[Call]]; - } - /// Get the CallingConv that should be used for the specified libcall. CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const { return LibcallImplCallingConvs[Call]; } - ArrayRef getLibcallImpls() const { - // Trim UNKNOWN_LIBCALL from the back - return ArrayRef(LibcallImpls).drop_back(); + /// Return the libcall provided by \p Impl + static RTLIB::Libcall getLibcallFromImpl(RTLIB::LibcallImpl Impl) { + return ImplToLibcall[Impl]; } - /// Return a function name compatible with RTLIB::MEMCPY, or nullptr if fully - /// unsupported. - RTLIB::LibcallImpl getMemcpyImpl() const { - RTLIB::LibcallImpl Memcpy = getLibcallImpl(RTLIB::MEMCPY); - if (Memcpy != RTLIB::Unsupported) - return Memcpy; - - // Fallback to memmove if memcpy isn't available. - return getLibcallImpl(RTLIB::MEMMOVE); + unsigned getNumAvailableLibcallImpls() const { + return AvailableLibcallImpls.count(); } bool isAvailable(RTLIB::LibcallImpl Impl) const { @@ -151,11 +126,6 @@ struct RuntimeLibcallsInfo { AvailableLibcallImpls.set(Impl); } - /// Return the libcall provided by \p Impl - static RTLIB::Libcall getLibcallFromImpl(RTLIB::LibcallImpl Impl) { - return ImplToLibcall[Impl]; - } - /// Check if a function name is a recognized runtime call of any kind. This /// does not consider if this call is available for any current compilation, /// just that it is a known call somewhere. This returns the set of all @@ -176,11 +146,8 @@ struct RuntimeLibcallsInfo { LLVM_ABI RTLIB::LibcallImpl getSupportedLibcallImpl(StringRef FuncName) const { for (RTLIB::LibcallImpl Impl : lookupLibcallImplName(FuncName)) { - // FIXME: This should not depend on looking up ImplToLibcall, only the - // list of libcalls for the module. - RTLIB::LibcallImpl Recognized = LibcallImpls[ImplToLibcall[Impl]]; - if (Recognized != RTLIB::Unsupported) - return Recognized; + if (isAvailable(Impl)) + return Impl; } return RTLIB::Unsupported; @@ -197,10 +164,6 @@ struct RuntimeLibcallsInfo { LLVM_ABI static iota_range lookupLibcallImplNameImpl(StringRef Name); - /// Stores the implementation choice for each each libcall. - RTLIB::LibcallImpl LibcallImpls[RTLIB::UNKNOWN_LIBCALL + 1] = { - RTLIB::Unsupported}; - static_assert(static_cast(CallingConv::C) == 0, "default calling conv should be encoded as 0"); @@ -274,6 +237,7 @@ struct RuntimeLibcallsInfo { }; } // namespace RTLIB + } // namespace llvm #endif // LLVM_IR_RUNTIME_LIBCALLS_H diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 4373c5397a3c6..1cf0b4964760b 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -88,6 +88,7 @@ add_llvm_component_library(LLVMCodeGen LatencyPriorityQueue.cpp LazyMachineBlockFrequencyInfo.cpp LexicalScopes.cpp + LibcallLoweringInfo.cpp LiveDebugVariables.cpp LiveIntervals.cpp LiveInterval.cpp diff --git a/llvm/lib/CodeGen/LibcallLoweringInfo.cpp b/llvm/lib/CodeGen/LibcallLoweringInfo.cpp new file mode 100644 index 0000000000000..5c1698cb6060e --- /dev/null +++ b/llvm/lib/CodeGen/LibcallLoweringInfo.cpp @@ -0,0 +1,26 @@ +//===- LibcallLoweringInfo.cpp - Interface for runtime libcalls -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LibcallLoweringInfo.h" + +using namespace llvm; + +LibcallLoweringInfo::LibcallLoweringInfo( + const RTLIB::RuntimeLibcallsInfo &RTLCI) + : RTLCI(RTLCI) { + // TODO: This should be generated with lowering predicates, and assert the + // call is available. + for (RTLIB::LibcallImpl Impl : RTLIB::libcall_impls()) { + if (RTLCI.isAvailable(Impl)) { + RTLIB::Libcall LC = RTLIB::RuntimeLibcallsInfo::getLibcallFromImpl(Impl); + // FIXME: Hack, assume the first available libcall wins. + if (LibcallImpls[LC] == RTLIB::Unsupported) + LibcallImpls[LC] = Impl; + } + } +} diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index b3535eaca5e9d..1cc591c17f9c3 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -697,9 +697,11 @@ ISD::CondCode TargetLoweringBase::getSoftFloatCmpLibcallPredicate( /// NOTE: The TargetMachine owns TLOF. TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) - : TM(tm), Libcalls(TM.getTargetTriple(), TM.Options.ExceptionModel, - TM.Options.FloatABIType, TM.Options.EABIVersion, - TM.Options.MCOptions.getABIName()) { + : TM(tm), + RuntimeLibcallInfo(TM.getTargetTriple(), TM.Options.ExceptionModel, + TM.Options.FloatABIType, TM.Options.EABIVersion, + TM.Options.MCOptions.getABIName()), + Libcalls(RuntimeLibcallInfo) { initActions(); // Perform these initializations only once. diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index 2ce5719228a0d..2fb01a4f95fea 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -19,6 +19,7 @@ using namespace llvm; using namespace RTLIB; +#define GET_RUNTIME_LIBCALLS_INFO #define GET_INIT_RUNTIME_LIBCALL_NAMES #define GET_SET_TARGET_RUNTIME_LIBCALL_SETS #define DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 23be42f9d60ce..fefc733fa7697 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1396,11 +1396,10 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { SmallVector LTO::getRuntimeLibcallSymbols(const Triple &TT) { RTLIB::RuntimeLibcallsInfo Libcalls(TT); SmallVector LibcallSymbols; - ArrayRef LibcallImpls = Libcalls.getLibcallImpls(); - LibcallSymbols.reserve(LibcallImpls.size()); + LibcallSymbols.reserve(Libcalls.getNumAvailableLibcallImpls()); - for (RTLIB::LibcallImpl Impl : LibcallImpls) { - if (Impl != RTLIB::Unsupported) + for (RTLIB::LibcallImpl Impl : RTLIB::libcall_impls()) { + if (Libcalls.isAvailable(Impl)) LibcallSymbols.push_back(Libcalls.getLibcallImplName(Impl).data()); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index 45b0e7dc12263..f3c236ca8c9ce 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -532,13 +532,19 @@ struct StaticLibcallNameMap { // FIXME: This is broken if there are ever different triples compiled with // different libcalls. RTLIB::RuntimeLibcallsInfo RTCI(TT); - for (RTLIB::Libcall LC : RTLIB::libcalls()) { - StringRef NameLibcall = RTCI.getLibcallName(LC); - if (!NameLibcall.empty() && - getRuntimeLibcallSignatures().Table[LC] != unsupported) { - assert(!Map.contains(NameLibcall) && - "duplicate libcall names in name map"); - Map[NameLibcall] = LC; + + ArrayRef Table = + getRuntimeLibcallSignatures().Table; + for (RTLIB::LibcallImpl Impl : RTLIB::libcall_impls()) { + if (!RTCI.isAvailable(Impl)) + continue; + RTLIB::Libcall LC = RTLIB::RuntimeLibcallsInfo::getLibcallFromImpl(Impl); + if (Table[LC] != unsupported) { + StringRef NameLibcall = + RTLIB::RuntimeLibcallsInfo::getLibcallImplName(Impl); + // FIXME: Map should be to LibcallImpl + if (!Map.insert({NameLibcall, LC}).second) + llvm_unreachable("duplicate libcall names in name map"); } } } diff --git a/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp b/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp index 6d4436b92c119..dd8706cfb2855 100644 --- a/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp +++ b/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp @@ -54,8 +54,8 @@ PreservedAnalyses DeclareRuntimeLibcallsPass::run(Module &M, const DataLayout &DL = M.getDataLayout(); const Triple &TT = M.getTargetTriple(); - for (RTLIB::LibcallImpl Impl : RTLCI.getLibcallImpls()) { - if (Impl == RTLIB::Unsupported) + for (RTLIB::LibcallImpl Impl : RTLIB::libcall_impls()) { + if (!RTLCI.isAvailable(Impl)) continue; auto [FuncTy, FuncAttrs] = RTLCI.getFunctionTy(Ctx, TT, DL, Impl); diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter-calling-conv.td b/llvm/test/TableGen/RuntimeLibcallEmitter-calling-conv.td index 2904474f6110b..e4a7126d79fbd 100644 --- a/llvm/test/TableGen/RuntimeLibcallEmitter-calling-conv.td +++ b/llvm/test/TableGen/RuntimeLibcallEmitter-calling-conv.td @@ -53,21 +53,21 @@ def MSP430LibraryWithCondCC : SystemRuntimeLibrary; // CHECK-NEXT: AvailableLibcallImpls = SystemAvailableImpls; // CHECK-EMPTY: -// CHECK-NEXT: static const LibcallImplPair LibraryCalls[] = { -// CHECK-NEXT: {RTLIB::SOME_FUNC, RTLIB::impl_func_b}, // func_b +// CHECK-NEXT: static const RTLIB::LibcallImpl LibraryCalls[] = { +// CHECK-NEXT: RTLIB::impl_func_b, // func_b // CHECK-NEXT: }; // CHECK-EMPTY: -// CHECK-NEXT: for (const auto [Func, Impl] : LibraryCalls) { -// CHECK-NEXT: setLibcallImpl(Func, Impl); +// CHECK-NEXT: for (const RTLIB::LibcallImpl Impl : LibraryCalls) { +// CHECK-NEXT: setAvailable(Impl); // CHECK-NEXT: } // CHECK-EMPTY: // CHECK-NEXT: return; @@ -53,13 +53,13 @@ def TheSystemLibraryA : SystemRuntimeLibrary @__sincosf_stret(float) [[SINCOS_ATTRS:#[0-9]+]] ; X64: declare { double, double } @__sincos_stret(double) [[SINCOS_ATTRS:#[0-9]+]] +; X64: declare <2 x float> @__sincosf_stret(float) [[SINCOS_ATTRS:#[0-9]+]] -; STRUCT: declare { float, float } @__sincosf_stret(float) [[SINCOS_ATTRS:#[0-9]+]] ; STRUCT: declare { double, double } @__sincos_stret(double) [[SINCOS_ATTRS:#[0-9]+]] +; STRUCT: declare { float, float } @__sincosf_stret(float) [[SINCOS_ATTRS:#[0-9]+]] -; SRET: declare void @__sincosf_stret(ptr sret({ float, float }) align 4, float) [[SINCOS_ATTRS:#[0-9]+]] ; SRET: declare void @__sincos_stret(ptr sret({ double, double }) align 4, double) [[SINCOS_ATTRS:#[0-9]+]] +; SRET: declare void @__sincosf_stret(ptr sret({ float, float }) align 4, float) [[SINCOS_ATTRS:#[0-9]+]] ; CHECK: attributes [[SINCOS_ATTRS]] = { nocallback nofree nosync nounwind willreturn memory(errnomem: write) } ; SRET: attributes [[SINCOS_ATTRS]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write, errnomem: write) } diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp index 6a36f471678bf..001ca7b658d3c 100644 --- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp +++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp @@ -544,11 +544,8 @@ void RuntimeLibcallEmitter::emitSystemRuntimeLibrarySetCalls( OS << "void llvm::RTLIB::RuntimeLibcallsInfo::setTargetRuntimeLibcallSets(" "const llvm::Triple &TT, ExceptionHandling ExceptionModel, " "FloatABI::ABIType FloatABI, EABI EABIVersion, " - "StringRef ABIName) {\n" - " struct LibcallImplPair {\n" - " RTLIB::Libcall Func;\n" - " RTLIB::LibcallImpl Impl;\n" - " };\n"; + "StringRef ABIName) {\n"; + ArrayRef AllLibs = Records.getAllDerivedDefinitions("SystemRuntimeLibrary"); @@ -703,7 +700,7 @@ void RuntimeLibcallEmitter::emitSystemRuntimeLibrarySetCalls( Funcs.erase(UniqueI, Funcs.end()); OS << indent(IndentDepth + 2) - << "static const LibcallImplPair LibraryCalls"; + << "static const RTLIB::LibcallImpl LibraryCalls"; SubsetPredicate.emitTableVariableNameSuffix(OS); if (FuncsWithCC.CallingConv) OS << '_' << FuncsWithCC.CallingConv->getName(); @@ -711,18 +708,18 @@ void RuntimeLibcallEmitter::emitSystemRuntimeLibrarySetCalls( OS << "[] = {\n"; for (const RuntimeLibcallImpl *LibCallImpl : Funcs) { OS << indent(IndentDepth + 6); - LibCallImpl->emitTableEntry(OS); + LibCallImpl->emitEnumEntry(OS); + OS << ", // " << LibCallImpl->getLibcallFuncName() << '\n'; } OS << indent(IndentDepth + 2) << "};\n\n" << indent(IndentDepth + 2) - << "for (const auto [Func, Impl] : LibraryCalls"; + << "for (const RTLIB::LibcallImpl Impl : LibraryCalls"; SubsetPredicate.emitTableVariableNameSuffix(OS); if (FuncsWithCC.CallingConv) OS << '_' << FuncsWithCC.CallingConv->getName(); - OS << ") {\n" - << indent(IndentDepth + 4) << "setLibcallImpl(Func, Impl);\n"; + OS << ") {\n" << indent(IndentDepth + 4) << "setAvailable(Impl);\n"; if (FuncsWithCC.CallingConv) { StringRef CCEnum = @@ -759,7 +756,7 @@ void RuntimeLibcallEmitter::run(raw_ostream &OS) { emitGetInitRuntimeLibcallNames(OS); { - IfDefEmitter IfDef(OS, "GET_SET_TARGET_RUNTIME_LIBCALL_SETS"); + IfDefEmitter IfDef(OS, "GET_RUNTIME_LIBCALLS_INFO"); emitSystemRuntimeLibrarySetCalls(OS); } } From dd8892300e7279e4b3ea5e085defe14d4849626f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 5 Nov 2025 09:17:58 -0800 Subject: [PATCH 30/61] RuntimeLibcalls: Remove LibcallLoweringPredicate from RuntimeLibcallImpl (#166585) This is unused and will not make sense. --- llvm/include/llvm/IR/RuntimeLibcallsImpl.td | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/IR/RuntimeLibcallsImpl.td b/llvm/include/llvm/IR/RuntimeLibcallsImpl.td index b5752c1b69ad8..92853125379f5 100644 --- a/llvm/include/llvm/IR/RuntimeLibcallsImpl.td +++ b/llvm/include/llvm/IR/RuntimeLibcallsImpl.td @@ -61,7 +61,6 @@ class RuntimeLibcall { class RuntimeLibcallImpl { RuntimeLibcall Provides = P; string LibCallFuncName = Name; - list LoweringPredicates; bit IsDefault = false; } From 6312d2751144bd53af7ef56798cbe60aa8b2fb56 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Wed, 5 Nov 2025 09:18:49 -0800 Subject: [PATCH 31/61] [DirectX] Emit `hlsl.wavesize` function attribute as entry property metadata (#165624) This pr adds support for emitting the `hlsl.wavesize` function attribute as an entry property metadata for a compute shader. It follows the implementation of `hlsl.numthreads`. - Collects the wave range information from the function attribute in `DXILMetadataAnalysis` - Introduce the `WaveRange` property tag - Emit a `WaveSize` or `WaveRange` metadata (depending on shader model) in `DXILTranslateMetadata` - Add tests for valid/invalid scenarios - Updates the base `PSVInfo` to reflect the min/max wave lane counts Resolves #70118 --- .../llvm/Analysis/DXILMetadataAnalysis.h | 3 + llvm/lib/Analysis/DXILMetadataAnalysis.cpp | 16 ++++ .../lib/Target/DirectX/DXContainerGlobals.cpp | 7 ++ .../Target/DirectX/DXILTranslateMetadata.cpp | 66 ++++++++++--- llvm/test/CodeGen/DirectX/wavesize-md-errs.ll | 31 ++++++ .../test/CodeGen/DirectX/wavesize-md-valid.ll | 96 +++++++++++++++++++ 6 files changed, 206 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/DirectX/wavesize-md-errs.ll create mode 100644 llvm/test/CodeGen/DirectX/wavesize-md-valid.ll diff --git a/llvm/include/llvm/Analysis/DXILMetadataAnalysis.h b/llvm/include/llvm/Analysis/DXILMetadataAnalysis.h index cb535ac14f1c6..a1b030c157eae 100644 --- a/llvm/include/llvm/Analysis/DXILMetadataAnalysis.h +++ b/llvm/include/llvm/Analysis/DXILMetadataAnalysis.h @@ -27,6 +27,9 @@ struct EntryProperties { unsigned NumThreadsX{0}; // X component unsigned NumThreadsY{0}; // Y component unsigned NumThreadsZ{0}; // Z component + unsigned WaveSizeMin{0}; // Minimum component + unsigned WaveSizeMax{0}; // Maximum component + unsigned WaveSizePref{0}; // Preferred component EntryProperties(const Function *Fn = nullptr) : Entry(Fn) {}; }; diff --git a/llvm/lib/Analysis/DXILMetadataAnalysis.cpp b/llvm/lib/Analysis/DXILMetadataAnalysis.cpp index 23f1aa82ae8a3..bd77cba385667 100644 --- a/llvm/lib/Analysis/DXILMetadataAnalysis.cpp +++ b/llvm/lib/Analysis/DXILMetadataAnalysis.cpp @@ -66,6 +66,22 @@ static ModuleMetadataInfo collectMetadataInfo(Module &M) { Success = llvm::to_integer(NumThreadsVec[2], EFP.NumThreadsZ, 10); assert(Success && "Failed to parse Z component of numthreads"); } + // Get wavesize attribute value, if one exists + StringRef WaveSizeStr = + F.getFnAttribute("hlsl.wavesize").getValueAsString(); + if (!WaveSizeStr.empty()) { + SmallVector WaveSizeVec; + WaveSizeStr.split(WaveSizeVec, ','); + assert(WaveSizeVec.size() == 3 && "Invalid wavesize specified"); + // Read in the three component values of numthreads + [[maybe_unused]] bool Success = + llvm::to_integer(WaveSizeVec[0], EFP.WaveSizeMin, 10); + assert(Success && "Failed to parse Min component of wavesize"); + Success = llvm::to_integer(WaveSizeVec[1], EFP.WaveSizeMax, 10); + assert(Success && "Failed to parse Max component of wavesize"); + Success = llvm::to_integer(WaveSizeVec[2], EFP.WaveSizePref, 10); + assert(Success && "Failed to parse Preferred component of wavesize"); + } MMDAI.EntryPropertyVec.push_back(EFP); } return MMDAI; diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp index eb4c8846441a2..677203d1c016b 100644 --- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp +++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp @@ -285,6 +285,13 @@ void DXContainerGlobals::addPipelineStateValidationInfo( PSV.BaseData.NumThreadsX = MMI.EntryPropertyVec[0].NumThreadsX; PSV.BaseData.NumThreadsY = MMI.EntryPropertyVec[0].NumThreadsY; PSV.BaseData.NumThreadsZ = MMI.EntryPropertyVec[0].NumThreadsZ; + if (MMI.EntryPropertyVec[0].WaveSizeMin) { + PSV.BaseData.MinimumWaveLaneCount = MMI.EntryPropertyVec[0].WaveSizeMin; + PSV.BaseData.MaximumWaveLaneCount = + MMI.EntryPropertyVec[0].WaveSizeMax + ? MMI.EntryPropertyVec[0].WaveSizeMax + : MMI.EntryPropertyVec[0].WaveSizeMin; + } break; default: break; diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp index cf8b833b3e42e..e1a472fe57642 100644 --- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp @@ -82,6 +82,7 @@ enum class EntryPropsTag { ASStateTag, WaveSize, EntryRootSig, + WaveRange = 23, }; } // namespace @@ -177,14 +178,15 @@ getTagValueAsMetadata(EntryPropsTag Tag, uint64_t Value, LLVMContext &Ctx) { case EntryPropsTag::ASStateTag: case EntryPropsTag::WaveSize: case EntryPropsTag::EntryRootSig: + case EntryPropsTag::WaveRange: llvm_unreachable("NYI: Unhandled entry property tag"); } return MDVals; } -static MDTuple * -getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags, - const Triple::EnvironmentType ShaderProfile) { +static MDTuple *getEntryPropAsMetadata(Module &M, const EntryProperties &EP, + uint64_t EntryShaderFlags, + const ModuleMetadataInfo &MMDI) { SmallVector MDVals; LLVMContext &Ctx = EP.Entry->getContext(); if (EntryShaderFlags != 0) @@ -195,12 +197,13 @@ getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags, // FIXME: support more props. // See https://github.com/llvm/llvm-project/issues/57948. // Add shader kind for lib entries. - if (ShaderProfile == Triple::EnvironmentType::Library && + if (MMDI.ShaderProfile == Triple::EnvironmentType::Library && EP.ShaderStage != Triple::EnvironmentType::Library) MDVals.append(getTagValueAsMetadata(EntryPropsTag::ShaderKind, getShaderStage(EP.ShaderStage), Ctx)); if (EP.ShaderStage == Triple::EnvironmentType::Compute) { + // Handle mandatory "hlsl.numthreads" MDVals.emplace_back(ConstantAsMetadata::get(ConstantInt::get( Type::getInt32Ty(Ctx), static_cast(EntryPropsTag::NumThreads)))); Metadata *NumThreadVals[] = {ConstantAsMetadata::get(ConstantInt::get( @@ -210,8 +213,48 @@ getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags, ConstantAsMetadata::get(ConstantInt::get( Type::getInt32Ty(Ctx), EP.NumThreadsZ))}; MDVals.emplace_back(MDNode::get(Ctx, NumThreadVals)); + + // Handle optional "hlsl.wavesize". The fields are optionally represented + // if they are non-zero. + if (EP.WaveSizeMin != 0) { + bool IsWaveRange = VersionTuple(6, 8) <= MMDI.ShaderModelVersion; + bool IsWaveSize = + !IsWaveRange && VersionTuple(6, 6) <= MMDI.ShaderModelVersion; + + if (!IsWaveRange && !IsWaveSize) { + reportError(M, "Shader model 6.6 or greater is required to specify " + "the \"hlsl.wavesize\" function attribute"); + return nullptr; + } + + // A range is being specified if EP.WaveSizeMax != 0 + if (EP.WaveSizeMax && !IsWaveRange) { + reportError( + M, "Shader model 6.8 or greater is required to specify " + "wave size range values of the \"hlsl.wavesize\" function " + "attribute"); + return nullptr; + } + + EntryPropsTag Tag = + IsWaveSize ? EntryPropsTag::WaveSize : EntryPropsTag::WaveRange; + MDVals.emplace_back(ConstantAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(Ctx), static_cast(Tag)))); + + SmallVector WaveSizeVals = {ConstantAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(Ctx), EP.WaveSizeMin))}; + if (IsWaveRange) { + WaveSizeVals.push_back(ConstantAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(Ctx), EP.WaveSizeMax))); + WaveSizeVals.push_back(ConstantAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(Ctx), EP.WaveSizePref))); + } + + MDVals.emplace_back(MDNode::get(Ctx, WaveSizeVals)); + } } } + if (MDVals.empty()) return nullptr; return MDNode::get(Ctx, MDVals); @@ -236,12 +279,11 @@ static MDTuple *constructEntryMetadata(const Function *EntryFn, return MDNode::get(Ctx, MDVals); } -static MDTuple *emitEntryMD(const EntryProperties &EP, MDTuple *Signatures, - MDNode *MDResources, +static MDTuple *emitEntryMD(Module &M, const EntryProperties &EP, + MDTuple *Signatures, MDNode *MDResources, const uint64_t EntryShaderFlags, - const Triple::EnvironmentType ShaderProfile) { - MDTuple *Properties = - getEntryPropAsMetadata(EP, EntryShaderFlags, ShaderProfile); + const ModuleMetadataInfo &MMDI) { + MDTuple *Properties = getEntryPropAsMetadata(M, EP, EntryShaderFlags, MMDI); return constructEntryMetadata(EP.Entry, Signatures, MDResources, Properties, EP.Entry->getContext()); } @@ -523,10 +565,8 @@ static void translateGlobalMetadata(Module &M, DXILResourceMap &DRM, Twine(Triple::getEnvironmentTypeName(MMDI.ShaderProfile) + "'")); } - - EntryFnMDNodes.emplace_back(emitEntryMD(EntryProp, Signatures, ResourceMD, - EntryShaderFlags, - MMDI.ShaderProfile)); + EntryFnMDNodes.emplace_back(emitEntryMD( + M, EntryProp, Signatures, ResourceMD, EntryShaderFlags, MMDI)); } NamedMDNode *EntryPointsNamedMD = diff --git a/llvm/test/CodeGen/DirectX/wavesize-md-errs.ll b/llvm/test/CodeGen/DirectX/wavesize-md-errs.ll new file mode 100644 index 0000000000000..9016c5d7e8d44 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/wavesize-md-errs.ll @@ -0,0 +1,31 @@ +; RUN: split-file %s %t +; RUN: not opt -S --dxil-translate-metadata %t/low-sm.ll 2>&1 | FileCheck %t/low-sm.ll +; RUN: not opt -S --dxil-translate-metadata %t/low-sm-for-range.ll 2>&1 | FileCheck %t/low-sm-for-range.ll + +; Test that wavesize metadata is only allowed on applicable shader model versions + +;--- low-sm.ll + +; CHECK: Shader model 6.6 or greater is required to specify the "hlsl.wavesize" function attribute + +target triple = "dxil-unknown-shadermodel6.5-compute" + +define void @main() #0 { +entry: + ret void +} + +attributes #0 = { "hlsl.wavesize"="16,0,0" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } + +;--- low-sm-for-range.ll + +; CHECK: Shader model 6.8 or greater is required to specify wave size range values of the "hlsl.wavesize" function attribute + +target triple = "dxil-unknown-shadermodel6.7-compute" + +define void @main() #0 { +entry: + ret void +} + +attributes #0 = { "hlsl.wavesize"="16,32,0" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } diff --git a/llvm/test/CodeGen/DirectX/wavesize-md-valid.ll b/llvm/test/CodeGen/DirectX/wavesize-md-valid.ll new file mode 100644 index 0000000000000..3ad6c1d034252 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/wavesize-md-valid.ll @@ -0,0 +1,96 @@ +; RUN: split-file %s %t +; RUN: opt -S --dxil-translate-metadata %t/only.ll | FileCheck %t/only.ll +; RUN: opt -S --dxil-translate-metadata %t/min.ll | FileCheck %t/min.ll +; RUN: opt -S --dxil-translate-metadata %t/max.ll | FileCheck %t/max.ll +; RUN: opt -S --dxil-translate-metadata %t/pref.ll | FileCheck %t/pref.ll + +; RUN: llc --filetype=obj %t/only.ll -o - | obj2yaml | FileCheck %t/only.ll --check-prefix=OBJ +; RUN: llc --filetype=obj %t/min.ll -o - | obj2yaml | FileCheck %t/min.ll --check-prefix=OBJ +; RUN: llc --filetype=obj %t/max.ll -o - | obj2yaml | FileCheck %t/max.ll --check-prefix=OBJ +; RUN: llc --filetype=obj %t/pref.ll -o - | obj2yaml | FileCheck %t/pref.ll --check-prefix=OBJ + +; Test that wave size/range metadata is correctly generated with the correct tag + +;--- only.ll + +; CHECK: !dx.entryPoints = !{![[#ENTRY:]]} +; CHECK: ![[#ENTRY]] = !{ptr @main, !"main", null, null, ![[#PROPS:]]} +; CHECK: ![[#PROPS]] = !{{{.*}}i32 11, ![[#WAVE_SIZE:]]{{.*}}} +; CHECK: ![[#WAVE_SIZE]] = !{i32 16} + +; OBJ: - Name: PSV0 +; OBJ: PSVInfo: +; OBJ: MinimumWaveLaneCount: 16 +; OBJ: MaximumWaveLaneCount: 16 + +target triple = "dxil-unknown-shadermodel6.6-compute" + +define void @main() #0 { +entry: + ret void +} + +attributes #0 = { "hlsl.wavesize"="16,0,0" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } + +;--- min.ll + +; CHECK: !dx.entryPoints = !{![[#ENTRY:]]} +; CHECK: ![[#ENTRY]] = !{ptr @main, !"main", null, null, ![[#PROPS:]]} +; CHECK: ![[#PROPS]] = !{{{.*}}i32 23, ![[#WAVE_SIZE:]]{{.*}}} +; CHECK: ![[#WAVE_SIZE]] = !{i32 16, i32 0, i32 0} + +; OBJ: - Name: PSV0 +; OBJ: PSVInfo: +; OBJ: MinimumWaveLaneCount: 16 +; OBJ: MaximumWaveLaneCount: 16 + +target triple = "dxil-unknown-shadermodel6.8-compute" + +define void @main() #0 { +entry: + ret void +} + +attributes #0 = { "hlsl.wavesize"="16,0,0" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } + +;--- max.ll + +; CHECK: !dx.entryPoints = !{![[#ENTRY:]]} +; CHECK: ![[#ENTRY]] = !{ptr @main, !"main", null, null, ![[#PROPS:]]} +; CHECK: ![[#PROPS]] = !{{{.*}}i32 23, ![[#WAVE_SIZE:]]{{.*}}} +; CHECK: ![[#WAVE_SIZE]] = !{i32 16, i32 32, i32 0} + +; OBJ: - Name: PSV0 +; OBJ: PSVInfo: +; OBJ: MinimumWaveLaneCount: 16 +; OBJ: MaximumWaveLaneCount: 32 + +target triple = "dxil-unknown-shadermodel6.8-compute" + +define void @main() #0 { +entry: + ret void +} + +attributes #0 = { "hlsl.wavesize"="16,32,0" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } + +;--- pref.ll + +; CHECK: !dx.entryPoints = !{![[#ENTRY:]]} +; CHECK: ![[#ENTRY]] = !{ptr @main, !"main", null, null, ![[#PROPS:]]} +; CHECK: ![[#PROPS]] = !{{{.*}}i32 23, ![[#WAVE_SIZE:]]{{.*}}} +; CHECK: ![[#WAVE_SIZE]] = !{i32 16, i32 64, i32 32} + +; OBJ: - Name: PSV0 +; OBJ: PSVInfo: +; OBJ: MinimumWaveLaneCount: 16 +; OBJ: MaximumWaveLaneCount: 64 + +target triple = "dxil-unknown-shadermodel6.8-compute" + +define void @main() #0 { +entry: + ret void +} + +attributes #0 = { "hlsl.wavesize"="16,64,32" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } From a1f0fe140a0e8f15dcd33df42b2c8cf170f69db8 Mon Sep 17 00:00:00 2001 From: nerix Date: Wed, 5 Nov 2025 18:19:43 +0100 Subject: [PATCH 32/61] [MsDemangle] Use NodeList over SmallVector for target names (#166586) Using `SmallVector` would introduce a dependency cycle (see https://github.com/llvm/llvm-project/pull/155630#discussion_r2495268497), so this uses a NodeList. --- llvm/lib/Demangle/MicrosoftDemangle.cpp | 50 ++++++++++++------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp index 250d382998982..0aefe6e077c24 100644 --- a/llvm/lib/Demangle/MicrosoftDemangle.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp @@ -15,8 +15,6 @@ #include "llvm/Demangle/MicrosoftDemangle.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Demangle/Demangle.h" #include "llvm/Demangle/DemangleConfig.h" #include "llvm/Demangle/MicrosoftDemangleNodes.h" @@ -279,13 +277,16 @@ demanglePointerCVQualifiers(std::string_view &MangledName) { DEMANGLE_UNREACHABLE; } -static NodeArrayNode *smallVecToNodeArray(ArenaAllocator &Arena, - ArrayRef Vec) { - NodeArrayNode *Arr = Arena.alloc(); - Arr->Count = Vec.size(); - Arr->Nodes = Arena.allocArray(Vec.size()); - std::memcpy(Arr->Nodes, Vec.data(), Vec.size() * sizeof(Node *)); - return Arr; +static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head, + size_t Count) { + NodeArrayNode *N = Arena.alloc(); + N->Count = Count; + N->Nodes = Arena.allocArray(Count); + for (size_t I = 0; I < Count; ++I) { + N->Nodes[I] = Head->N; + Head = Head->Next; + } + return N; } std::string_view Demangler::copyString(std::string_view Borrowed) { @@ -335,17 +336,28 @@ Demangler::demangleSpecialTableSymbolNode(std::string_view &MangledName, std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName); - SmallVector TargetNames; + NodeList *TargetCurrent = nullptr; + NodeList *TargetHead = nullptr; + size_t Count = 0; while (!consumeFront(MangledName, '@')) { + ++Count; + + NodeList *Next = Arena.alloc(); + if (TargetCurrent) + TargetCurrent->Next = Next; + else + TargetHead = Next; + + TargetCurrent = Next; QualifiedNameNode *QN = demangleFullyQualifiedTypeName(MangledName); if (Error) return nullptr; assert(QN); - TargetNames.push_back(QN); + TargetCurrent->N = QN; } - if (!TargetNames.empty()) - STSN->TargetNames = smallVecToNodeArray(Arena, TargetNames); + if (Count > 0) + STSN->TargetNames = nodeListToNodeArray(Arena, TargetHead, Count); return STSN; } @@ -1627,18 +1639,6 @@ Demangler::demangleNameScopePiece(std::string_view &MangledName) { return demangleSimpleName(MangledName, /*Memorize=*/true); } -static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head, - size_t Count) { - NodeArrayNode *N = Arena.alloc(); - N->Count = Count; - N->Nodes = Arena.allocArray(Count); - for (size_t I = 0; I < Count; ++I) { - N->Nodes[I] = Head->N; - Head = Head->Next; - } - return N; -} - QualifiedNameNode * Demangler::demangleNameScopeChain(std::string_view &MangledName, IdentifierNode *UnqualifiedName) { From 9564b26f81f481f91299ebc446011ed4e5407400 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Wed, 5 Nov 2025 09:20:10 -0800 Subject: [PATCH 33/61] [lldb] Support a Stable ABI LLDB_PYTHON_EXT_SUFFIX (#166269) When building against the Python Stable API, we should use the `abi3` ABI tag. Otherwise, Python will refuse to import the native shared object. This PR adds support for generating a stable ABI compatible suffix when `LLDB_ENABLE_PYTHON_LIMITED_API` is set. Previously, on Darwin when building against Python 3.14, you would end up with `_lldb.cpython-314-darwin.so`. Now, when using the stable ABI, you get `_lldb.abi3.so` instead. A different version of the Python interpreter will not consider loading the former, but will load the latter. --- lldb/CMakeLists.txt | 5 +++++ lldb/bindings/python/get-python-config.py | 8 +++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt index 01b5546fee00d..0736e6ba132c8 100644 --- a/lldb/CMakeLists.txt +++ b/lldb/CMakeLists.txt @@ -62,11 +62,16 @@ if (LLDB_ENABLE_PYTHON) set(cachestring_LLDB_PYTHON_EXT_SUFFIX "Filename extension for native code python modules") + if (LLDB_ENABLE_PYTHON_LIMITED_API) + set(stable_abi "--stable-abi") + endif() + foreach(var LLDB_PYTHON_RELATIVE_PATH LLDB_PYTHON_EXE_RELATIVE_PATH LLDB_PYTHON_EXT_SUFFIX) if(NOT DEFINED ${var} AND NOT CMAKE_CROSSCOMPILING) execute_process( COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/bindings/python/get-python-config.py + ${stable_abi} ${var} OUTPUT_VARIABLE value OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/lldb/bindings/python/get-python-config.py b/lldb/bindings/python/get-python-config.py index ae84cbb1215a9..bf8cc48b013e1 100755 --- a/lldb/bindings/python/get-python-config.py +++ b/lldb/bindings/python/get-python-config.py @@ -18,6 +18,9 @@ def relpath_nodots(path, base): def main(): parser = argparse.ArgumentParser(description="extract cmake variables from python") parser.add_argument("variable_name") + parser.add_argument( + "--stable-abi", action="store_true", help="Target the Stable C ABI" + ) args = parser.parse_args() if args.variable_name == "LLDB_PYTHON_RELATIVE_PATH": # LLDB_PYTHON_RELATIVE_PATH is the relative path from lldb's prefix @@ -68,7 +71,10 @@ def main(): print("sys.prefix:", sys.prefix, file=sys.stderr) sys.exit(1) elif args.variable_name == "LLDB_PYTHON_EXT_SUFFIX": - print(sysconfig.get_config_var("EXT_SUFFIX")) + if args.stable_abi: + print(".abi3%s" % sysconfig.get_config_var("SHLIB_SUFFIX")) + else: + print(sysconfig.get_config_var("EXT_SUFFIX")) else: parser.error(f"unknown variable {args.variable_name}") From f60e69315e9ed94b2b330acb39a766ac86aa1f80 Mon Sep 17 00:00:00 2001 From: Prabhu Rajasekaran Date: Wed, 5 Nov 2025 09:22:08 -0800 Subject: [PATCH 34/61] [llvm] Emit canonical linkage correct function symbol (#166487) In the call graph section, we were emitting the temporary label pointing to the start of the function instead of the canonical linkage correct function symbol. This patch fixes it and updates the corresponding tests. --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 3 +-- llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll | 3 +-- llvm/test/CodeGen/ARM/call-graph-section-assembly.ll | 3 +-- llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll | 2 +- llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll | 3 +-- llvm/test/CodeGen/X86/call-graph-section-assembly.ll | 3 +-- 6 files changed, 6 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index f65d88a669f13..713277d0bc5e0 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1708,7 +1708,6 @@ void AsmPrinter::emitCallGraphSection(const MachineFunction &MF, OutStreamer->pushSection(); OutStreamer->switchSection(FuncCGSection); - const MCSymbol *FunctionSymbol = getFunctionBegin(); const Function &F = MF.getFunction(); // If this function has external linkage or has its address taken and // it is not a callback, then anything could call it. @@ -1747,7 +1746,7 @@ void AsmPrinter::emitCallGraphSection(const MachineFunction &MF, // 8) Each unique indirect target type id. OutStreamer->emitInt8(CallGraphSectionFormatVersion::V_0); OutStreamer->emitInt8(static_cast(CGFlags)); - OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); + OutStreamer->emitSymbolValue(getSymbol(&F), TM.getProgramPointerSize()); const auto *TypeId = extractNumericCGTypeId(F); if (IsIndirectTarget && TypeId) OutStreamer->emitInt64(TypeId->getZExtValue()); diff --git a/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll b/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll index cabd43edff9d6..9e243aec1128d 100644 --- a/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll +++ b/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll @@ -14,7 +14,6 @@ entry: } ; CHECK: _ZL10myCallbacki: -; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]: define internal void @_ZL10myCallbacki(i32 %value) !type !2 { entry: %sink = alloca i32, align 4 @@ -33,7 +32,7 @@ entry: ;; Flags -- Potential indirect target so LSB is set to 1. Other bits are 0. ; CHECK-NEXT: .byte 1 ;; Function Entry PC -; CHECK-NEXT: .long [[LABEL_FUNC]] +; CHECK-NEXT: .long _ZL10myCallbacki ;; Function type ID -5212364466660467813 ; CHECK-NEXT: .long 1154849691 ; CHECK-NEXT: .long 3081369122 diff --git a/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll b/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll index 3d3974ee6ba3b..8e8881ee722fb 100644 --- a/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll +++ b/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll @@ -11,7 +11,6 @@ declare !type !1 i32 @direct_bar(i8) declare !type !2 ptr @direct_baz(ptr) ; CHECK: ball: -; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]: define ptr @ball() { entry: call void @direct_foo() @@ -42,7 +41,7 @@ entry: ;; Flags ; CHECK-NEXT: .byte 7 ;; Function Entry PC -; CHECK-NEXT: .long [[LABEL_FUNC]] +; CHECK-NEXT: .long ball ;; Function type ID -- set to 0 as no type metadata attached to function. ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 0 diff --git a/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll b/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll index 80360041c106a..35e570bdde405 100644 --- a/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll +++ b/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll @@ -29,6 +29,6 @@ declare !type !2 i32 @bar(i8 signext) ; CHECK: Hex dump of section '.llvm.callgraph': ; CHECK-NEXT: 0x00000000 00050000 00008e19 0b7f3326 e3000154 -; CHECK-NEXT: 0x00000010 86bc5981 4b8e3000 05100000 00a150b8 +; CHECK-NEXT: 0x00000010 86bc5981 4b8e3000 05000000 00a150b8 ;; Verify that the type id 0x308e4b8159bc8654 is in section. ; CHECK-NEXT: 0x00000020 3e0cfe3c b2015486 bc59814b 8e30 diff --git a/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll b/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll index f36baba402421..ab8498d8d3451 100644 --- a/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll +++ b/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll @@ -14,7 +14,6 @@ entry: } ; CHECK: _ZL10myCallbacki: -; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]: define internal void @_ZL10myCallbacki(i32 %value) !type !2 { entry: %sink = alloca i32, align 4 @@ -33,6 +32,6 @@ entry: ;; Flags -- Potential indirect target so LSB is set to 1. Other bits are 0. ; CHECK-NEXT: .byte 1 ;; Function Entry PC -; CHECK-NEXT: .quad [[LABEL_FUNC]] +; CHECK-NEXT: .quad _ZL10myCallbacki ;; Function type ID ; CHECK-NEXT: .quad -5212364466660467813 diff --git a/llvm/test/CodeGen/X86/call-graph-section-assembly.ll b/llvm/test/CodeGen/X86/call-graph-section-assembly.ll index cdbad668aec54..02d71073b65c5 100644 --- a/llvm/test/CodeGen/X86/call-graph-section-assembly.ll +++ b/llvm/test/CodeGen/X86/call-graph-section-assembly.ll @@ -11,7 +11,6 @@ declare !type !1 i32 @direct_bar(i8) declare !type !2 ptr @direct_baz(ptr) ; CHECK: ball: -; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]: define ptr @ball() { entry: call void @direct_foo() @@ -42,7 +41,7 @@ entry: ;; Flags ; CHECK-NEXT: .byte 7 ;; Function Entry PC -; CHECK-NEXT: .quad [[LABEL_FUNC]] +; CHECK-NEXT: .quad ball ;; Function type ID -- set to 0 as no type metadata attached to function. ; CHECK-NEXT: .quad 0 ;; Number of unique direct callees. From cb41408d3c2de0d79b9c4c39ed2a8639906bc572 Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Wed, 5 Nov 2025 20:27:11 +0300 Subject: [PATCH 35/61] [Xtensa] Fix S32C1I instruction encoding and copyPhysReg. (#165174) Fix S21C1I instruction encoding.Fix special registers parsing for S32C1I feature. Fix copyPhysReg function for f32 registers copy. --- .../MCTargetDesc/XtensaMCCodeEmitter.cpp | 2 +- .../MCTargetDesc/XtensaMCTargetDesc.cpp | 2 +- llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp | 21 +++++++++++++++++-- llvm/test/CodeGen/Xtensa/s32c1i.ll | 7 +++++++ llvm/test/MC/Xtensa/s32c1i.s | 13 ++++++++++++ 5 files changed, 41 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/Xtensa/s32c1i.ll create mode 100644 llvm/test/MC/Xtensa/s32c1i.s diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp index bd4d4ebd2a729..5977a276b1236 100644 --- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp +++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp @@ -320,7 +320,7 @@ XtensaMCCodeEmitter::getMemRegEncoding(const MCInst &MI, unsigned OpNo, case Xtensa::SSIP: case Xtensa::LSI: case Xtensa::LSIP: - + case Xtensa::S32C1I: if (Res & 0x3) { report_fatal_error("Unexpected operand value!"); } diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp index 4e730707dcb78..8d0fd078b2696 100644 --- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp +++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp @@ -202,7 +202,7 @@ bool Xtensa::checkRegister(MCRegister RegNo, const FeatureBitset &FeatureBits, return FeatureBits[Xtensa::FeatureWindowed]; case Xtensa::ATOMCTL: case Xtensa::SCOMPARE1: - return FeatureBits[Xtensa::FeatureWindowed]; + return FeatureBits[Xtensa::FeatureS32C1I]; case Xtensa::NoRegister: return false; } diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp index b0f924f2cd58e..be69cefb5b78f 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp @@ -114,14 +114,31 @@ void XtensaInstrInfo::copyPhysReg(MachineBasicBlock &MBB, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest, bool RenamableSrc) const { - // The MOV instruction is not present in core ISA, + unsigned Opcode; + + // The MOV instruction is not present in core ISA for AR registers, // so use OR instruction. - if (Xtensa::ARRegClass.contains(DestReg, SrcReg)) + if (Xtensa::ARRegClass.contains(DestReg, SrcReg)) { BuildMI(MBB, MBBI, DL, get(Xtensa::OR), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)) .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + if (STI.hasSingleFloat() && Xtensa::FPRRegClass.contains(SrcReg) && + Xtensa::FPRRegClass.contains(DestReg)) + Opcode = Xtensa::MOV_S; + else if (STI.hasSingleFloat() && Xtensa::FPRRegClass.contains(SrcReg) && + Xtensa::ARRegClass.contains(DestReg)) + Opcode = Xtensa::RFR; + else if (STI.hasSingleFloat() && Xtensa::ARRegClass.contains(SrcReg) && + Xtensa::FPRRegClass.contains(DestReg)) + Opcode = Xtensa::WFR; else report_fatal_error("Impossible reg-to-reg copy"); + + BuildMI(MBB, MBBI, DL, get(Opcode), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } void XtensaInstrInfo::storeRegToStackSlot( diff --git a/llvm/test/CodeGen/Xtensa/s32c1i.ll b/llvm/test/CodeGen/Xtensa/s32c1i.ll new file mode 100644 index 0000000000000..aad738abe6a4c --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/s32c1i.ll @@ -0,0 +1,7 @@ +; RUN: llc -mtriple=xtensa -mattr=+s32c1i -filetype=obj %s -o - | llvm-objdump --arch=xtensa --mattr=s32c1i -d - | FileCheck %s -check-prefix=XTENSA + +define i32 @constraint_i(i32 %a) { +; XTENSA: 0: 22 e2 01 s32c1i a2, a2, 4 + %res = tail call i32 asm "s32c1i $0, $1, $2", "=r,r,i"(i32 %a, i32 4) + ret i32 %res +} diff --git a/llvm/test/MC/Xtensa/s32c1i.s b/llvm/test/MC/Xtensa/s32c1i.s new file mode 100644 index 0000000000000..218a86dd56752 --- /dev/null +++ b/llvm/test/MC/Xtensa/s32c1i.s @@ -0,0 +1,13 @@ +# RUN: llvm-mc %s -triple=xtensa -show-encoding --mattr=+s32c1i \ +# RUN: | FileCheck -check-prefixes=CHECK,CHECK-INST %s + +.align 4 +LBL0: + +# CHECK-INST: xsr a3, atomctl +# CHECK: # encoding: [0x30,0x63,0x61] +xsr a3, atomctl + +# CHECK-INST: xsr a3, scompare1 +# CHECK: # encoding: [0x30,0x0c,0x61] +xsr a3, scompare1 From efa7ab06ebf7e88a3cf1befebf58f24f8b09fcec Mon Sep 17 00:00:00 2001 From: Jin Huang Date: Wed, 5 Nov 2025 09:33:09 -0800 Subject: [PATCH 36/61] [profcheck] Add unknown branch weights to expanded cmpxchg loop. (#165841) The AtomicExpandPass is responsible for lowering high-level atomic operations (like `atomicrmw fadd`) that are unsupported by the target hardware into a cmpxchg retry loop. Given that we cannot empirically prove the precision branch weights, It uses the `setExplicitlyUnknownBranchWeightsIfProfiled` function to explicitly add "unknown" (50/50) branch weights to this branch. This PR includes fies for the following tests: ``` Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll Transforms/AtomicExpand/AArch64/pcsections.ll Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-agent.ll Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-system.ll Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-nand.ll Transforms/AtomicExpand/AMDGPU/expand-atomic-simplify-cfg-CAS-block.ll Transforms/AtomicExpand/AMDGPU/expand-atomic-v2bf16-agent.ll Transforms/AtomicExpand/AMDGPU/expand-atomic-v2bf16-system.ll Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-agent.ll Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-system.ll Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll Transforms/AtomicExpand/ARM/atomicrmw-fp.ll Transforms/AtomicExpand/LoongArch/atomicrmw-fp.ll Transforms/AtomicExpand/Mips/atomicrmw-fp.ll Transforms/AtomicExpand/PowerPC/atomicrmw-fp.ll Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll Transforms/AtomicExpand/SPARC/libcalls.ll Transforms/AtomicExpand/X86/expand-atomic-rmw-fp.ll Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll Transforms/AtomicExpand/X86/expand-atomic-xchg-fp.ll ``` Co-authored-by: Jin Huang --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 7 ++++++- .../AtomicExpand/AArch64/atomicrmw-fp.ll | 14 +++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 0b55c03a46747..bd62b1646c761 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -1686,7 +1686,12 @@ Value *AtomicExpandImpl::insertRMWCmpXchgLoop( Loaded->addIncoming(NewLoaded, LoopBB); - Builder.CreateCondBr(Success, ExitBB, LoopBB); + Instruction *CondBr = Builder.CreateCondBr(Success, ExitBB, LoopBB); + + // Atomic RMW expands to a cmpxchg loop, Since precise branch weights + // cannot be easily determined here, we mark the branch as "unknown" (50/50) + // to prevent misleading optimizations. + setExplicitlyUnknownBranchWeightsIfProfiled(*CondBr, *F, DEBUG_TYPE); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); return NewLoaded; diff --git a/llvm/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll b/llvm/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll index 8ffacb9bdd5f6..1b728f56ab2ea 100644 --- a/llvm/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll +++ b/llvm/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll @@ -1,7 +1,7 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -S -mtriple=aarch64-linux-gnu -passes=atomic-expand %s | FileCheck %s -define float @test_atomicrmw_fadd_f32(ptr %ptr, float %value) { +define float @test_atomicrmw_fadd_f32(ptr %ptr, float %value) !prof !0 { ; CHECK-LABEL: @test_atomicrmw_fadd_f32( ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] @@ -14,7 +14,7 @@ define float @test_atomicrmw_fadd_f32(ptr %ptr, float %value) { ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 ; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float -; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]], !prof [[PROF1:![0-9]+]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: ret float [[TMP5]] ; @@ -336,3 +336,11 @@ define <2 x half> @atomicrmw_fminimum_2_x_half(ptr %ptr, <2 x half> %val) { %res = atomicrmw fminimum ptr %ptr, <2 x half> %val seq_cst ret <2 x half> %res } + +!0 = !{!"function_entry_count", i64 1000} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) } +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF1]] = !{!"unknown", !"atomic-expand"} +;. From d49c6707d07389e4bccdb23951dc9d3bc20996b1 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Wed, 5 Nov 2025 17:48:47 +0000 Subject: [PATCH 37/61] [libcxxabi][demangle] Fix the cp-to-llvm.sh sync script to copy all headers (#166572) In https://github.com/llvm/llvm-project/pull/137947 I refactored the script and added a `copy_files` function, which takes the header files to copy as an argument. But because the list of headers is a space separated string, we need to quote the string. Otherwise we would just copy the first header in the list. This patch also adds an `echo` statement in the `copy_files` loop to print the source/destination. Confirming that the files are copied as expected. ``` $ libcxxabi/src/demangle/cp-to-llvm.sh This will overwrite the copies of ItaniumDemangle.h ItaniumNodes.def StringViewExtras.h Utility.h in ../../../llvm/include/llvm/Demangle and DemangleTestCases.inc in ../../../llvm/include/llvm/Demangle/../Testing/Demangle; are you sure? [y/N]y Copying ./ItaniumDemangle.h to ../../../llvm/include/llvm/Demangle/ItaniumDemangle.h Copying ./ItaniumNodes.def to ../../../llvm/include/llvm/Demangle/ItaniumNodes.def Copying ./StringViewExtras.h to ../../../llvm/include/llvm/Demangle/StringViewExtras.h Copying ./Utility.h to ../../../llvm/include/llvm/Demangle/Utility.h Copying ../../test/DemangleTestCases.inc to ../../../llvm/include/llvm/Demangle/../Testing/Demangle/DemangleTestCases.inc ``` Luckily there weren't any out-of-sync changes introduced in the meantime. --- libcxxabi/src/demangle/cp-to-llvm.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libcxxabi/src/demangle/cp-to-llvm.sh b/libcxxabi/src/demangle/cp-to-llvm.sh index f773dff9f0a8b..9c1db6fec29a6 100755 --- a/libcxxabi/src/demangle/cp-to-llvm.sh +++ b/libcxxabi/src/demangle/cp-to-llvm.sh @@ -42,6 +42,7 @@ copy_files() { chmod -w $dst/README.txt for I in $hdrs ; do + echo "Copying ${src}/$I to ${dst}/$I" rm -f $dst/$I dash=$(echo "$I---------------------------" | cut -c -27 |\ sed 's|[^-]*||') @@ -53,6 +54,6 @@ copy_files() { } if [[ $ANSWER =~ ^[Yy]$ ]]; then - copy_files . $LLVM_DEMANGLE_DIR $HDRS - copy_files ../../test $LLVM_TESTING_DIR $TEST_HDRS + copy_files . $LLVM_DEMANGLE_DIR "${HDRS}" + copy_files ../../test $LLVM_TESTING_DIR "${TEST_HDRS}" fi From 3c162ba247d30c9d8113e66fe5d96e24156ce797 Mon Sep 17 00:00:00 2001 From: nerix Date: Wed, 5 Nov 2025 19:07:44 +0100 Subject: [PATCH 38/61] [LLDB][NativePDB] Add non-overlapping fields in root struct (#166243) When anonymous unions are used in a struct or vice versa, their fields are merged into the parent record when using PDB. LLDB tries to recreate the original definition of the record _with_ the anonymous unions/structs. For tagged unions (like `std::optional`) where the tag followed the anonymous union, the result was suboptimal: ```cpp // input: struct Foo { union { Bar b; char c; }; bool tag; }; // reconstructed: struct Foo { union { Bar b; struct { char c; bool tag; }; }; }; ``` Once the algorithm is in some nested union, it can't get out. In the above case, we can get to the correct reconstructed record if we always add fields that don't overlap others in the root struct. So when we see `tag`, we'll see that it comes after all other fields, so it's possible to add it in the root `Foo`. --- .../NativePDB/UdtRecordCompleter.cpp | 23 +++++++- .../SymbolFile/NativePDB/class_layout.cpp | 8 +-- .../NativePDB/UdtRecordCompleterTests.cpp | 52 ++++++++++++++++++- 3 files changed, 76 insertions(+), 7 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp index 1c575e90bd72c..46cf9b8524ede 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp @@ -442,6 +442,10 @@ void UdtRecordCompleter::Record::ConstructRecord() { // The end offset to a vector of field/struct that ends at the offset. std::map> end_offset_map; + auto is_last_end_offset = [&](auto it) { + return it != end_offset_map.end() && ++it == end_offset_map.end(); + }; + for (auto &pair : fields_map) { uint64_t offset = pair.first; auto &fields = pair.second; @@ -462,8 +466,23 @@ void UdtRecordCompleter::Record::ConstructRecord() { } if (iter->second.empty()) continue; - parent = iter->second.back(); - iter->second.pop_back(); + + // If the new fields come after the already added ones + // without overlap, go back to the root. + if (iter->first <= offset && is_last_end_offset(iter)) { + if (record.kind == Member::Struct) { + parent = &record; + } else { + assert(record.kind == Member::Union && + "Current record must be a union"); + assert(!record.fields.empty()); + // For unions, append the field to the last struct + parent = record.fields.back().get(); + } + } else { + parent = iter->second.back(); + iter->second.pop_back(); + } } // If it's a field, then the field is inside a union, so we can safely // increase its size by converting it to a struct to hold multiple fields. diff --git a/lldb/test/Shell/SymbolFile/NativePDB/class_layout.cpp b/lldb/test/Shell/SymbolFile/NativePDB/class_layout.cpp index 36bfdb9a8e565..83ed533eb13e3 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/class_layout.cpp +++ b/lldb/test/Shell/SymbolFile/NativePDB/class_layout.cpp @@ -34,9 +34,6 @@ // CHECK-NEXT: s4 = { // CHECK-NEXT: x = ([0] = 67, [1] = 68, [2] = 99) // CHECK-NEXT: } -// CHECK-NEXT: s1 = { -// CHECK-NEXT: x = ([0] = 69, [1] = 70, [2] = 71) -// CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } @@ -47,6 +44,9 @@ // CHECK-NEXT: c2 = 'D' // CHECK-NEXT: } // CHECK-NEXT: } +// CHECK-NEXT: s1 = { +// CHECK-NEXT: x = ([0] = 69, [1] = 70, [2] = 71) +// CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: (lldb) type lookup C // CHECK-NEXT: struct C { @@ -63,7 +63,6 @@ // CHECK-NEXT: struct { // CHECK-NEXT: char c4; // CHECK-NEXT: S3 s4; -// CHECK-NEXT: S3 s1; // CHECK-NEXT: }; // CHECK-NEXT: }; // CHECK-NEXT: }; @@ -72,6 +71,7 @@ // CHECK-NEXT: char c2; // CHECK-NEXT: }; // CHECK-NEXT: }; +// CHECK-NEXT: S3 s1; // CHECK-NEXT: } diff --git a/lldb/unittests/SymbolFile/NativePDB/UdtRecordCompleterTests.cpp b/lldb/unittests/SymbolFile/NativePDB/UdtRecordCompleterTests.cpp index 17284b61b9a6e..cd6db5fcb1f4c 100644 --- a/lldb/unittests/SymbolFile/NativePDB/UdtRecordCompleterTests.cpp +++ b/lldb/unittests/SymbolFile/NativePDB/UdtRecordCompleterTests.cpp @@ -99,7 +99,7 @@ Member *AddField(Member *member, StringRef name, uint64_t byte_offset, std::make_unique(name, byte_offset * 8, byte_size * 8, clang::QualType(), lldb::eAccessPublic, 0); field->kind = kind; - field->base_offset = base_offset; + field->base_offset = base_offset * 8; member->fields.push_back(std::move(field)); return member->fields.back().get(); } @@ -111,6 +111,9 @@ TEST_F(UdtRecordCompleterRecordTests, TestAnonymousUnionInStruct) { CollectMember("m2", 0, 4); CollectMember("m3", 0, 1); CollectMember("m4", 0, 8); + CollectMember("m5", 8, 8); + CollectMember("m6", 16, 4); + CollectMember("m7", 16, 8); ConstructRecord(); // struct { @@ -120,6 +123,11 @@ TEST_F(UdtRecordCompleterRecordTests, TestAnonymousUnionInStruct) { // m3; // m4; // }; + // m5; + // union { + // m6; + // m7; + // }; // }; Record record; record.start_offset = 0; @@ -128,6 +136,10 @@ TEST_F(UdtRecordCompleterRecordTests, TestAnonymousUnionInStruct) { AddField(u, "m2", 0, 4, Member::Field); AddField(u, "m3", 0, 1, Member::Field); AddField(u, "m4", 0, 8, Member::Field); + AddField(&record.record, "m5", 8, 8, Member::Field); + Member *u2 = AddField(&record.record, "", 16, 0, Member::Union); + AddField(u2, "m6", 16, 4, Member::Field); + AddField(u2, "m7", 16, 8, Member::Field); EXPECT_EQ(WrappedRecord(this->record), WrappedRecord(record)); } @@ -243,3 +255,41 @@ TEST_F(UdtRecordCompleterRecordTests, TestNestedUnionStructInUnion) { AddField(s2, "m4", 2, 4, Member::Field); EXPECT_EQ(WrappedRecord(this->record), WrappedRecord(record)); } + +TEST_F(UdtRecordCompleterRecordTests, TestNestedStructInUnionInStructInUnion) { + SetKind(Member::Kind::Union); + CollectMember("m1", 0, 4); + CollectMember("m2", 0, 2); + CollectMember("m3", 0, 2); + CollectMember("m4", 2, 4); + CollectMember("m5", 6, 2); + CollectMember("m6", 6, 2); + CollectMember("m7", 8, 2); + ConstructRecord(); + + // union { + // m1; + // m2; + // struct { + // m3; + // m4; + // union { + // m5; + // m6; + // }; + // m7; + // }; + // }; + Record record; + record.start_offset = 0; + AddField(&record.record, "m1", 0, 4, Member::Field); + AddField(&record.record, "m2", 0, 2, Member::Field); + Member *s = AddField(&record.record, "", 0, 0, Member::Struct); + AddField(s, "m3", 0, 2, Member::Field); + AddField(s, "m4", 2, 4, Member::Field); + Member *u = AddField(s, "", 6, 0, Member::Union); + AddField(u, "m5", 6, 2, Member::Field); + AddField(u, "m6", 6, 2, Member::Field); + AddField(s, "m7", 8, 2, Member::Field); + EXPECT_EQ(WrappedRecord(this->record), WrappedRecord(record)); +} From af0b6b18a8690e98586f342d28bb97a29c0eb45d Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Wed, 5 Nov 2025 19:16:03 +0100 Subject: [PATCH 39/61] [ProfCheck][NFC] fix argument order for call to setExplicitlyUnknownBranchWeightsIfProfiled (#166601) --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index bd62b1646c761..d9bc042d6807e 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -1691,7 +1691,7 @@ Value *AtomicExpandImpl::insertRMWCmpXchgLoop( // Atomic RMW expands to a cmpxchg loop, Since precise branch weights // cannot be easily determined here, we mark the branch as "unknown" (50/50) // to prevent misleading optimizations. - setExplicitlyUnknownBranchWeightsIfProfiled(*CondBr, *F, DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*CondBr, DEBUG_TYPE); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); return NewLoaded; From ebeb36b12e4649954a62dfbef7a5b04c5d8e52d7 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Wed, 5 Nov 2025 13:22:00 -0500 Subject: [PATCH 40/61] [PowerPC] Implement vsx rotate left word instr (#160754) Implement `xvrlw`. --- llvm/lib/Target/PowerPC/PPCInstrFuture.td | 3 +++ llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt | 3 +++ .../MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt | 3 +++ llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s | 4 ++++ 4 files changed, 13 insertions(+) diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td index 0c2e44e18f463..dfbbba0116f25 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td @@ -420,6 +420,9 @@ let Predicates = [HasVSX, IsISAFuture] in { : VXForm_VRTAB5<323, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), "vucmprlh $VRT, $VRA, $VRB", []>; + def XVRLW: XX3Form_XTAB6<60, 184, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvrlw $XT, $XA, $XB", []>; + // AES Acceleration Instructions def XXAESENCP : XX3Form_XTABp5_M2<194, (outs vsrprc:$XTp), (ins vsrprc:$XAp, vsrprc:$XBp, u2imm:$M), diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt index 2661ed5b04cc9..b27a50d93f5b9 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt @@ -250,6 +250,9 @@ #CHECK: vucmprhh 1, 3, 6 0x10,0x23,0x31,0x03 +#CHECK: xvrlw 34, 15, 16 +0xf0,0x4f,0x85,0xc1 + #CHECK: xxaes192encp 8, 10, 14 0xf1,0x0b,0x76,0x10 diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt index 7fb8254ced0ac..72662d9736740 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt @@ -244,6 +244,9 @@ #CHECK: vucmprhh 1, 3, 6 0x03,0x31,0x23,0x10 +#CHECK: xvrlw 34, 15, 16 +0xc1,0x85,0x4f,0xf0 + #CHECK: xxaes192encp 8, 10, 14 0x10,0x76,0x0b,0xf1 diff --git a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s index 40059c440b128..ab72649fc3404 100644 --- a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s +++ b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s @@ -355,6 +355,10 @@ #CHECK-BE: vucmprhh 1, 3, 6 # encoding: [0x10,0x23,0x31,0x03] #CHECK-LE: vucmprhh 1, 3, 6 # encoding: [0x03,0x31,0x23,0x10] + xvrlw 34, 15, 16 +#CHECK-BE: xvrlw 34, 15, 16 # encoding: [0xf0,0x4f,0x85,0xc1] +#CHECK-LE: xvrlw 34, 15, 16 # encoding: [0xc1,0x85,0x4f,0xf0] + xxaes192encp 8, 10, 14 #CHECK-BE: xxaes192encp 8, 10, 14 # encoding: [0xf1,0x0b,0x76,0x10] #CHECK-LE: xxaes192encp 8, 10, 14 # encoding: [0x10,0x76,0x0b,0xf1] From d3caae1c07c297a5765d0498faf43f4730f71466 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 5 Nov 2025 10:24:56 -0800 Subject: [PATCH 41/61] [clang] Refactor clang's keyword enable/disable mechanism to allow lldb to re-use it (#165323) lldb's CPlusPlusNameParser is currently identifying keywords using it's own map implemented using clang/Basic/TokenKinds.def. However, it does not respect the language options so identifiers can incorrectly determined to be keywords when using languages in which they are not keywords. Rather than implement the logic to enable/disable keywords in both projects it makes sense for lldb to use clang's implementation. See #164284 for more information --- clang/include/clang/Basic/IdentifierTable.h | 51 +++++++++++++++++++ clang/lib/Basic/IdentifierTable.cpp | 55 +-------------------- 2 files changed, 52 insertions(+), 54 deletions(-) diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index e4044bcdfcc60..b27492d19a65b 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -46,6 +46,57 @@ class LangOptions; class MultiKeywordSelector; class SourceLocation; +/// Constants for TokenKinds.def +enum TokenKey : unsigned { + KEYC99 = 0x1, + KEYCXX = 0x2, + KEYCXX11 = 0x4, + KEYGNU = 0x8, + KEYMS = 0x10, + BOOLSUPPORT = 0x20, + KEYALTIVEC = 0x40, + KEYNOCXX = 0x80, + KEYBORLAND = 0x100, + KEYOPENCLC = 0x200, + KEYC23 = 0x400, + KEYNOMS18 = 0x800, + KEYNOOPENCL = 0x1000, + WCHARSUPPORT = 0x2000, + HALFSUPPORT = 0x4000, + CHAR8SUPPORT = 0x8000, + KEYOBJC = 0x10000, + KEYZVECTOR = 0x20000, + KEYCOROUTINES = 0x40000, + KEYMODULES = 0x80000, + KEYCXX20 = 0x100000, + KEYOPENCLCXX = 0x200000, + KEYMSCOMPAT = 0x400000, + KEYSYCL = 0x800000, + KEYCUDA = 0x1000000, + KEYZOS = 0x2000000, + KEYNOZOS = 0x4000000, + KEYHLSL = 0x8000000, + KEYFIXEDPOINT = 0x10000000, + KEYMAX = KEYFIXEDPOINT, // The maximum key + KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20, + KEYALL = (KEYMAX | (KEYMAX - 1)) & ~KEYNOMS18 & ~KEYNOOPENCL & + ~KEYNOZOS // KEYNOMS18, KEYNOOPENCL, KEYNOZOS are excluded. +}; + +/// How a keyword is treated in the selected standard. This enum is ordered +/// intentionally so that the value that 'wins' is the most 'permissive'. +enum KeywordStatus { + KS_Unknown, // Not yet calculated. Used when figuring out the status. + KS_Disabled, // Disabled + KS_Future, // Is a keyword in future standard + KS_Extension, // Is an extension + KS_Enabled, // Enabled +}; + +/// Translates flags as specified in TokenKinds.def into keyword status +/// in the given language standard. +KeywordStatus getKeywordStatus(const LangOptions &LangOpts, unsigned Flags); + enum class ReservedIdentifierStatus { NotReserved = 0, StartsWithUnderscoreAtGlobalScope, diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 4a2b77cd16bfc..d1c959b9687c4 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -77,57 +77,6 @@ IdentifierTable::IdentifierTable(const LangOptions &LangOpts, // Language Keyword Implementation //===----------------------------------------------------------------------===// -// Constants for TokenKinds.def -namespace { - -enum TokenKey : unsigned { - KEYC99 = 0x1, - KEYCXX = 0x2, - KEYCXX11 = 0x4, - KEYGNU = 0x8, - KEYMS = 0x10, - BOOLSUPPORT = 0x20, - KEYALTIVEC = 0x40, - KEYNOCXX = 0x80, - KEYBORLAND = 0x100, - KEYOPENCLC = 0x200, - KEYC23 = 0x400, - KEYNOMS18 = 0x800, - KEYNOOPENCL = 0x1000, - WCHARSUPPORT = 0x2000, - HALFSUPPORT = 0x4000, - CHAR8SUPPORT = 0x8000, - KEYOBJC = 0x10000, - KEYZVECTOR = 0x20000, - KEYCOROUTINES = 0x40000, - KEYMODULES = 0x80000, - KEYCXX20 = 0x100000, - KEYOPENCLCXX = 0x200000, - KEYMSCOMPAT = 0x400000, - KEYSYCL = 0x800000, - KEYCUDA = 0x1000000, - KEYZOS = 0x2000000, - KEYNOZOS = 0x4000000, - KEYHLSL = 0x8000000, - KEYFIXEDPOINT = 0x10000000, - KEYMAX = KEYFIXEDPOINT, // The maximum key - KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20, - KEYALL = (KEYMAX | (KEYMAX - 1)) & ~KEYNOMS18 & ~KEYNOOPENCL & - ~KEYNOZOS // KEYNOMS18, KEYNOOPENCL, KEYNOZOS are excluded. -}; - -/// How a keyword is treated in the selected standard. This enum is ordered -/// intentionally so that the value that 'wins' is the most 'permissive'. -enum KeywordStatus { - KS_Unknown, // Not yet calculated. Used when figuring out the status. - KS_Disabled, // Disabled - KS_Future, // Is a keyword in future standard - KS_Extension, // Is an extension - KS_Enabled, // Enabled -}; - -} // namespace - // This works on a single TokenKey flag and checks the LangOpts to get the // KeywordStatus based exclusively on this flag, so that it can be merged in // getKeywordStatus. Most should be enabled/disabled, but some might imply @@ -220,9 +169,7 @@ static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts, } } -/// Translates flags as specified in TokenKinds.def into keyword status -/// in the given language standard. -static KeywordStatus getKeywordStatus(const LangOptions &LangOpts, +KeywordStatus clang::getKeywordStatus(const LangOptions &LangOpts, unsigned Flags) { // KEYALL means always enabled, so special case this one. if (Flags == KEYALL) return KS_Enabled; From bc55f4f4f2b4ef196cf3ec25f69dfbd9cd032237 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Wed, 5 Nov 2025 10:32:38 -0800 Subject: [PATCH 42/61] [debugserver] Fix debugserver build on < macOS 10.15 (#166599) The VM_MEMORY_SANITIZER constant was added in macOs 10.15 and friends. Support using the constant on older OSes. Fixes #156144 --- lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp b/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp index 9d0d60fdaaed9..c8dce75af05eb 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp +++ b/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp @@ -14,6 +14,12 @@ #include "DNBLog.h" #include #include +#include + +// From , but not on older OSs. +#ifndef VM_MEMORY_SANITIZER +#define VM_MEMORY_SANITIZER 99 +#endif MachVMRegion::MachVMRegion(task_t task) : m_task(task), m_addr(INVALID_NUB_ADDRESS), m_err(), From c193eea86e9f0111e15df62343813857e306b779 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 5 Nov 2025 18:39:01 +0000 Subject: [PATCH 43/61] [gn build] Port 056d2c12f756 --- llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index 444670212cafb..eb41df208941a 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -88,6 +88,7 @@ static_library("CodeGen") { "LatencyPriorityQueue.cpp", "LazyMachineBlockFrequencyInfo.cpp", "LexicalScopes.cpp", + "LibcallLoweringInfo.cpp", "LiveDebugValues/InstrRefBasedImpl.cpp", "LiveDebugValues/LiveDebugValues.cpp", "LiveDebugValues/VarLocBasedImpl.cpp", From 120689e46679c6db37cd9e839ec0721e80a22d4f Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Wed, 5 Nov 2025 11:02:28 -0800 Subject: [PATCH 44/61] [libc] Migrate ctype_utils to use char instead of int where applicable. (#166225) Functions like isalpha / tolower can operate on chars internally. This allows us to get rid of unnecessary casts and open a way to creating wchar_t overloads with the same names (e.g. for isalpha), that would simplify templated code for conversion functions (see 315dfe5865962d8a3d60e21d1fffce5214fe54ef). Add the int->char converstion to public entrypoints implementation instead. We also need to introduce bounds check on the input argument values - these functions' behavior is unspecified if the argument is neither EOF nor fits in "unsigned char" range, but the tests we've had verified that they always return false for small negative values. To preserve this behavior, cover it explicitly. --- libc/src/__support/ctype_utils.h | 24 +++++++++---------- libc/src/__support/integer_to_string.h | 5 ++-- libc/src/ctype/CMakeLists.txt | 23 ++++++++++++++++++ libc/src/ctype/isalnum.cpp | 7 ++++-- libc/src/ctype/isalnum_l.cpp | 7 ++++-- libc/src/ctype/isalpha.cpp | 5 +++- libc/src/ctype/isalpha_l.cpp | 5 +++- libc/src/ctype/isdigit.cpp | 6 ++++- libc/src/ctype/isdigit_l.cpp | 6 ++++- libc/src/ctype/isgraph.cpp | 5 +++- libc/src/ctype/isgraph_l.cpp | 5 +++- libc/src/ctype/islower.cpp | 7 ++++-- libc/src/ctype/islower_l.cpp | 7 ++++-- libc/src/ctype/ispunct.cpp | 5 +++- libc/src/ctype/ispunct_l.cpp | 5 +++- libc/src/ctype/isspace.cpp | 7 ++++-- libc/src/ctype/isspace_l.cpp | 7 ++++-- libc/src/ctype/isupper.cpp | 7 ++++-- libc/src/ctype/isupper_l.cpp | 7 ++++-- libc/src/ctype/isxdigit.cpp | 7 ++++-- libc/src/ctype/isxdigit_l.cpp | 7 ++++-- libc/src/ctype/tolower.cpp | 11 +++++++-- libc/src/ctype/tolower_l.cpp | 9 +++++-- libc/src/ctype/toupper.cpp | 11 +++++++-- libc/src/ctype/toupper_l.cpp | 9 +++++-- .../printf_core/float_dec_converter_limited.h | 4 ++-- .../stdio/printf_core/float_hex_converter.h | 7 +++--- libc/src/stdlib/l64a.cpp | 6 ++--- libc/src/string/strcasestr.cpp | 4 ++-- libc/src/strings/strcasecmp.cpp | 4 ++-- libc/src/strings/strcasecmp_l.cpp | 4 ++-- libc/src/strings/strncasecmp.cpp | 4 ++-- libc/src/strings/strncasecmp_l.cpp | 4 ++-- libc/test/UnitTest/MemoryMatcher.cpp | 4 ++-- libc/test/src/ctype/islower_test.cpp | 2 +- libc/test/src/stdlib/StrtolTest.h | 24 +++++++++---------- .../llvm-project-overlay/libc/BUILD.bazel | 11 +++++++++ 37 files changed, 196 insertions(+), 86 deletions(-) diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h index be0f25330af9e..61b7a0aeb5b67 100644 --- a/libc/src/__support/ctype_utils.h +++ b/libc/src/__support/ctype_utils.h @@ -27,7 +27,7 @@ namespace internal { // as well as a way to support non-ASCII character encodings. // Similarly, do not change these functions to use case ranges. e.g. -// bool islower(int ch) { +// bool islower(char ch) { // switch(ch) { // case 'a'...'z': // return true; @@ -37,7 +37,7 @@ namespace internal { // EBCDIC. Technically we could use some smaller ranges, but that's even harder // to read. -LIBC_INLINE static constexpr bool islower(int ch) { +LIBC_INLINE static constexpr bool islower(char ch) { switch (ch) { case 'a': case 'b': @@ -71,7 +71,7 @@ LIBC_INLINE static constexpr bool islower(int ch) { } } -LIBC_INLINE static constexpr bool isupper(int ch) { +LIBC_INLINE static constexpr bool isupper(char ch) { switch (ch) { case 'A': case 'B': @@ -105,7 +105,7 @@ LIBC_INLINE static constexpr bool isupper(int ch) { } } -LIBC_INLINE static constexpr bool isdigit(int ch) { +LIBC_INLINE static constexpr bool isdigit(char ch) { switch (ch) { case '0': case '1': @@ -123,7 +123,7 @@ LIBC_INLINE static constexpr bool isdigit(int ch) { } } -LIBC_INLINE static constexpr int tolower(int ch) { +LIBC_INLINE static constexpr char tolower(char ch) { switch (ch) { case 'A': return 'a'; @@ -182,7 +182,7 @@ LIBC_INLINE static constexpr int tolower(int ch) { } } -LIBC_INLINE static constexpr int toupper(int ch) { +LIBC_INLINE static constexpr char toupper(char ch) { switch (ch) { case 'a': return 'A'; @@ -241,7 +241,7 @@ LIBC_INLINE static constexpr int toupper(int ch) { } } -LIBC_INLINE static constexpr bool isalpha(int ch) { +LIBC_INLINE static constexpr bool isalpha(char ch) { switch (ch) { case 'a': case 'b': @@ -301,7 +301,7 @@ LIBC_INLINE static constexpr bool isalpha(int ch) { } } -LIBC_INLINE static constexpr bool isalnum(int ch) { +LIBC_INLINE static constexpr bool isalnum(char ch) { switch (ch) { case 'a': case 'b': @@ -371,7 +371,7 @@ LIBC_INLINE static constexpr bool isalnum(int ch) { } } -LIBC_INLINE static constexpr int b36_char_to_int(int ch) { +LIBC_INLINE static constexpr int b36_char_to_int(char ch) { switch (ch) { case '0': return 0; @@ -476,7 +476,7 @@ LIBC_INLINE static constexpr int b36_char_to_int(int ch) { } } -LIBC_INLINE static constexpr int int_to_b36_char(int num) { +LIBC_INLINE static constexpr char int_to_b36_char(int num) { // Can't actually use LIBC_ASSERT here because it depends on integer_to_string // which depends on this. @@ -559,7 +559,7 @@ LIBC_INLINE static constexpr int int_to_b36_char(int num) { } } -LIBC_INLINE static constexpr bool isspace(int ch) { +LIBC_INLINE static constexpr bool isspace(char ch) { switch (ch) { case ' ': case '\t': @@ -574,7 +574,7 @@ LIBC_INLINE static constexpr bool isspace(int ch) { } // not yet encoding independent. -LIBC_INLINE static constexpr bool isgraph(int ch) { +LIBC_INLINE static constexpr bool isgraph(char ch) { return 0x20 < ch && ch < 0x7f; } diff --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h index 29449bd739730..5e7369de00962 100644 --- a/libc/src/__support/integer_to_string.h +++ b/libc/src/__support/integer_to_string.h @@ -378,9 +378,8 @@ template class IntegerToString { using UNSIGNED_T = make_integral_or_big_int_unsigned_t; LIBC_INLINE static char digit_char(uint8_t digit) { - const int result = internal::int_to_b36_char(digit); - return static_cast(Fmt::IS_UPPERCASE ? internal::toupper(result) - : result); + const char result = internal::int_to_b36_char(digit); + return Fmt::IS_UPPERCASE ? internal::toupper(result) : result; } LIBC_INLINE static void diff --git a/libc/src/ctype/CMakeLists.txt b/libc/src/ctype/CMakeLists.txt index 8830c1bccf9ea..68e982bd4529e 100644 --- a/libc/src/ctype/CMakeLists.txt +++ b/libc/src/ctype/CMakeLists.txt @@ -6,6 +6,7 @@ add_entrypoint_object( isalnum.h DEPENDS libc.include.ctype + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -16,6 +17,7 @@ add_entrypoint_object( HDRS isalpha.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -50,6 +52,7 @@ add_entrypoint_object( HDRS isdigit.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -60,6 +63,7 @@ add_entrypoint_object( HDRS isgraph.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -70,6 +74,7 @@ add_entrypoint_object( HDRS islower.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -88,6 +93,7 @@ add_entrypoint_object( HDRS ispunct.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -97,6 +103,9 @@ add_entrypoint_object( isspace.cpp HDRS isspace.h + DEPENDS + libc.src.__support.CPP.limits + libc.src.__support.ctype_utils ) add_entrypoint_object( @@ -106,6 +115,7 @@ add_entrypoint_object( HDRS isupper.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -116,6 +126,7 @@ add_entrypoint_object( HDRS isxdigit.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -126,6 +137,7 @@ add_entrypoint_object( HDRS tolower.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -144,6 +156,7 @@ add_entrypoint_object( HDRS toupper.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -160,6 +173,7 @@ add_entrypoint_object( isalnum_l.h DEPENDS libc.include.ctype + libc.src.__support.CPP.limits libc.src.__support.ctype_utils libc.hdr.types.locale_t ) @@ -171,6 +185,7 @@ add_entrypoint_object( HDRS isalpha_l.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils libc.hdr.types.locale_t ) @@ -202,6 +217,7 @@ add_entrypoint_object( HDRS isdigit_l.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils libc.hdr.types.locale_t ) @@ -224,6 +240,7 @@ add_entrypoint_object( HDRS islower_l.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils libc.hdr.types.locale_t ) @@ -257,6 +274,8 @@ add_entrypoint_object( isspace_l.h DEPENDS libc.hdr.types.locale_t + libc.src.__support.CPP.limits + libc.src.__support.ctype_utils ) add_entrypoint_object( @@ -266,6 +285,7 @@ add_entrypoint_object( HDRS isupper_l.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils libc.hdr.types.locale_t ) @@ -277,6 +297,7 @@ add_entrypoint_object( HDRS isxdigit_l.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils libc.hdr.types.locale_t ) @@ -288,6 +309,7 @@ add_entrypoint_object( HDRS tolower_l.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils libc.hdr.types.locale_t ) @@ -299,6 +321,7 @@ add_entrypoint_object( HDRS toupper_l.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils libc.hdr.types.locale_t ) diff --git a/libc/src/ctype/isalnum.cpp b/libc/src/ctype/isalnum.cpp index 54a3e35748879..102b5e79e4a18 100644 --- a/libc/src/ctype/isalnum.cpp +++ b/libc/src/ctype/isalnum.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isalnum.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isalnum, (int c)) { - return static_cast(internal::isalnum(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isalnum(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isalnum_l.cpp b/libc/src/ctype/isalnum_l.cpp index 671d9b75c4c33..173e1c174121e 100644 --- a/libc/src/ctype/isalnum_l.cpp +++ b/libc/src/ctype/isalnum_l.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isalnum_l.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isalnum_l, (int c, locale_t)) { - return static_cast(internal::isalnum(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isalnum(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isalpha.cpp b/libc/src/ctype/isalpha.cpp index 78b26f6a486ea..7c874bf373866 100644 --- a/libc/src/ctype/isalpha.cpp +++ b/libc/src/ctype/isalpha.cpp @@ -8,6 +8,7 @@ #include "src/ctype/isalpha.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" @@ -15,7 +16,9 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isalpha, (int c)) { - return static_cast(internal::isalpha(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isalpha(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isalpha_l.cpp b/libc/src/ctype/isalpha_l.cpp index 0619d979bedf2..982bcc569faaf 100644 --- a/libc/src/ctype/isalpha_l.cpp +++ b/libc/src/ctype/isalpha_l.cpp @@ -8,6 +8,7 @@ #include "src/ctype/isalpha_l.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" @@ -15,7 +16,9 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isalpha_l, (int c, locale_t)) { - return static_cast(internal::isalpha(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isalpha(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isdigit.cpp b/libc/src/ctype/isdigit.cpp index 1f711943861f8..43553c794a2f3 100644 --- a/libc/src/ctype/isdigit.cpp +++ b/libc/src/ctype/isdigit.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isdigit.h" + +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" @@ -14,7 +16,9 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isdigit, (int c)) { - return static_cast(internal::isdigit(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isdigit(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isdigit_l.cpp b/libc/src/ctype/isdigit_l.cpp index ca981362bfe83..40b5618906dac 100644 --- a/libc/src/ctype/isdigit_l.cpp +++ b/libc/src/ctype/isdigit_l.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isdigit_l.h" + +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" @@ -14,7 +16,9 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isdigit_l, (int c, locale_t)) { - return static_cast(internal::isdigit(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isdigit(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isgraph.cpp b/libc/src/ctype/isgraph.cpp index 74bb2e75d138e..b9308ecb7367c 100644 --- a/libc/src/ctype/isgraph.cpp +++ b/libc/src/ctype/isgraph.cpp @@ -8,6 +8,7 @@ #include "src/ctype/isgraph.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" @@ -15,7 +16,9 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isgraph, (int c)) { - return static_cast(internal::isgraph(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isgraph(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isgraph_l.cpp b/libc/src/ctype/isgraph_l.cpp index cbef6df148aed..dddcb9be4f80c 100644 --- a/libc/src/ctype/isgraph_l.cpp +++ b/libc/src/ctype/isgraph_l.cpp @@ -8,6 +8,7 @@ #include "src/ctype/isgraph_l.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" @@ -15,7 +16,9 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isgraph_l, (int c, locale_t)) { - return static_cast(internal::isgraph(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isgraph(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/islower.cpp b/libc/src/ctype/islower.cpp index 831aad32d3a22..920bfc1cc1a59 100644 --- a/libc/src/ctype/islower.cpp +++ b/libc/src/ctype/islower.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/islower.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, islower, (int c)) { - return static_cast(internal::islower(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::islower(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/islower_l.cpp b/libc/src/ctype/islower_l.cpp index b9be6acc81c99..da97026dc59a7 100644 --- a/libc/src/ctype/islower_l.cpp +++ b/libc/src/ctype/islower_l.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/islower_l.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, islower_l, (int c, locale_t)) { - return static_cast(internal::islower(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::islower(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/ispunct.cpp b/libc/src/ctype/ispunct.cpp index 0635294220b9c..4950036e9b81f 100644 --- a/libc/src/ctype/ispunct.cpp +++ b/libc/src/ctype/ispunct.cpp @@ -8,6 +8,7 @@ #include "src/ctype/ispunct.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" @@ -15,7 +16,9 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, ispunct, (int c)) { - const unsigned ch = static_cast(c); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + const char ch = static_cast(c); return static_cast(!internal::isalnum(ch) && internal::isgraph(ch)); } diff --git a/libc/src/ctype/ispunct_l.cpp b/libc/src/ctype/ispunct_l.cpp index e825fbe2001b0..79cd47b6a214d 100644 --- a/libc/src/ctype/ispunct_l.cpp +++ b/libc/src/ctype/ispunct_l.cpp @@ -8,6 +8,7 @@ #include "src/ctype/ispunct_l.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" @@ -15,7 +16,9 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, ispunct_l, (int c, locale_t)) { - const unsigned ch = static_cast(c); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + const char ch = static_cast(c); return static_cast(!internal::isalnum(ch) && internal::isgraph(ch)); } diff --git a/libc/src/ctype/isspace.cpp b/libc/src/ctype/isspace.cpp index 005bf460fc103..998dbf28f51d0 100644 --- a/libc/src/ctype/isspace.cpp +++ b/libc/src/ctype/isspace.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isspace.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isspace, (int c)) { - return static_cast(internal::isspace(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isspace(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isspace_l.cpp b/libc/src/ctype/isspace_l.cpp index 5c46dd6805126..e40765326b35e 100644 --- a/libc/src/ctype/isspace_l.cpp +++ b/libc/src/ctype/isspace_l.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isspace_l.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isspace_l, (int c, locale_t)) { - return static_cast(internal::isspace(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isspace(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isupper.cpp b/libc/src/ctype/isupper.cpp index 965fa336b28b4..c5c3dbd5d7d4a 100644 --- a/libc/src/ctype/isupper.cpp +++ b/libc/src/ctype/isupper.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isupper.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isupper, (int c)) { - return static_cast(internal::isupper(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isupper(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isupper_l.cpp b/libc/src/ctype/isupper_l.cpp index 358990261d603..44ed9dab90a16 100644 --- a/libc/src/ctype/isupper_l.cpp +++ b/libc/src/ctype/isupper_l.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isupper_l.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isupper_l, (int c, locale_t)) { - return static_cast(internal::isupper(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isupper(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isxdigit.cpp b/libc/src/ctype/isxdigit.cpp index 81f645c6f49fc..1b2e71769b3f8 100644 --- a/libc/src/ctype/isxdigit.cpp +++ b/libc/src/ctype/isxdigit.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isxdigit.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isxdigit, (int c)) { - const unsigned ch = static_cast(c); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + const char ch = static_cast(c); return static_cast(internal::isalnum(ch) && internal::b36_char_to_int(ch) < 16); } diff --git a/libc/src/ctype/isxdigit_l.cpp b/libc/src/ctype/isxdigit_l.cpp index eddfd20a2da3b..e6150473b0043 100644 --- a/libc/src/ctype/isxdigit_l.cpp +++ b/libc/src/ctype/isxdigit_l.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isxdigit_l.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isxdigit_l, (int c, locale_t)) { - const unsigned ch = static_cast(c); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + const char ch = static_cast(c); return static_cast(internal::isalnum(ch) && internal::b36_char_to_int(ch) < 16); } diff --git a/libc/src/ctype/tolower.cpp b/libc/src/ctype/tolower.cpp index 3ecad7bc5d5d5..b45c5f2688a61 100644 --- a/libc/src/ctype/tolower.cpp +++ b/libc/src/ctype/tolower.cpp @@ -7,13 +7,20 @@ //===----------------------------------------------------------------------===// #include "src/ctype/tolower.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(int, tolower, (int c)) { return internal::tolower(c); } +LLVM_LIBC_FUNCTION(int, tolower, (int c)) { + if (c < cpp::numeric_limits::min() || + c > cpp::numeric_limits::max()) { + return c; + } + return static_cast(internal::tolower(static_cast(c))); +} } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/tolower_l.cpp b/libc/src/ctype/tolower_l.cpp index 7ccf31617e592..049e46aea13c0 100644 --- a/libc/src/ctype/tolower_l.cpp +++ b/libc/src/ctype/tolower_l.cpp @@ -7,15 +7,20 @@ //===----------------------------------------------------------------------===// #include "src/ctype/tolower_l.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, tolower_l, (int c, locale_t)) { - return internal::tolower(c); + if (c < cpp::numeric_limits::min() || + c > cpp::numeric_limits::max()) { + return c; + } + return static_cast(internal::tolower(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/toupper.cpp b/libc/src/ctype/toupper.cpp index 1e1e8fc400711..0e387238ce3b6 100644 --- a/libc/src/ctype/toupper.cpp +++ b/libc/src/ctype/toupper.cpp @@ -7,13 +7,20 @@ //===----------------------------------------------------------------------===// #include "src/ctype/toupper.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(int, toupper, (int c)) { return internal::toupper(c); } +LLVM_LIBC_FUNCTION(int, toupper, (int c)) { + if (c < cpp::numeric_limits::min() || + c > cpp::numeric_limits::max()) { + return c; + } + return static_cast(internal::toupper(static_cast(c))); +} } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/toupper_l.cpp b/libc/src/ctype/toupper_l.cpp index a435ca1ab5d41..d1dff262c9377 100644 --- a/libc/src/ctype/toupper_l.cpp +++ b/libc/src/ctype/toupper_l.cpp @@ -7,15 +7,20 @@ //===----------------------------------------------------------------------===// #include "src/ctype/toupper_l.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, toupper_l, (int c, locale_t)) { - return internal::toupper(c); + if (c < cpp::numeric_limits::min() || + c > cpp::numeric_limits::max()) { + return c; + } + return static_cast(internal::toupper(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/printf_core/float_dec_converter_limited.h b/libc/src/stdio/printf_core/float_dec_converter_limited.h index 9cdc13573d320..0f85d0a8d26b4 100644 --- a/libc/src/stdio/printf_core/float_dec_converter_limited.h +++ b/libc/src/stdio/printf_core/float_dec_converter_limited.h @@ -363,8 +363,8 @@ DigitsOutput decimal_digits(DigitsInput input, int precision, bool e_mode) { // we made it from and doing the decimal conversion all over again.) for (size_t i = output.ndigits; i-- > 0;) { if (output.digits[i] != '9') { - output.digits[i] = static_cast(internal::int_to_b36_char( - internal::b36_char_to_int(output.digits[i]) + 1)); + output.digits[i] = internal::int_to_b36_char( + internal::b36_char_to_int(output.digits[i]) + 1); break; } else { output.digits[i] = '0'; diff --git a/libc/src/stdio/printf_core/float_hex_converter.h b/libc/src/stdio/printf_core/float_hex_converter.h index 16592e7bac932..9b57f1d803e74 100644 --- a/libc/src/stdio/printf_core/float_hex_converter.h +++ b/libc/src/stdio/printf_core/float_hex_converter.h @@ -137,9 +137,9 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, size_t first_non_zero = 1; for (; mant_cur > 0; --mant_cur, mantissa >>= 4) { char mant_mod_16 = static_cast(mantissa % 16); - char new_digit = static_cast(internal::int_to_b36_char(mant_mod_16)); + char new_digit = internal::int_to_b36_char(mant_mod_16); if (internal::isupper(to_conv.conv_name)) - new_digit = static_cast(internal::toupper(new_digit)); + new_digit = internal::toupper(new_digit); mant_buffer[mant_cur - 1] = new_digit; if (new_digit != '0' && first_non_zero < mant_cur) first_non_zero = mant_cur; @@ -167,8 +167,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, size_t exp_cur = EXP_LEN; for (; exponent > 0; --exp_cur, exponent /= 10) { - exp_buffer[exp_cur - 1] = - static_cast(internal::int_to_b36_char(exponent % 10)); + exp_buffer[exp_cur - 1] = internal::int_to_b36_char(exponent % 10); } if (exp_cur == EXP_LEN) { // if nothing else was written, write a 0. exp_buffer[EXP_LEN - 1] = '0'; diff --git a/libc/src/stdlib/l64a.cpp b/libc/src/stdlib/l64a.cpp index d59e65e7dc4c2..d8fe8ef86bf7d 100644 --- a/libc/src/stdlib/l64a.cpp +++ b/libc/src/stdlib/l64a.cpp @@ -32,15 +32,13 @@ constexpr static char b64_int_to_char(uint32_t num) { if (num == 1) return '/'; if (num < 38) - return static_cast( - internal::toupper(internal::int_to_b36_char(num - 2))); + return internal::toupper(internal::int_to_b36_char(num - 2)); // this tolower is technically unnecessary, but it provides safety if we // change the default behavior of int_to_b36_char. Also the compiler // completely elides it so there's no performance penalty, see: // https://godbolt.org/z/o5ennv7fc - return static_cast( - internal::tolower(internal::int_to_b36_char(num - 2 - 26))); + return internal::tolower(internal::int_to_b36_char(num - 2 - 26)); } // This function takes a long and converts the low 32 bits of it into at most 6 diff --git a/libc/src/string/strcasestr.cpp b/libc/src/string/strcasestr.cpp index de8e4bec7fe0b..575d6bed16d11 100644 --- a/libc/src/string/strcasestr.cpp +++ b/libc/src/string/strcasestr.cpp @@ -21,8 +21,8 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(char *, strcasestr, (const char *haystack, const char *needle)) { auto case_cmp = [](char a, char b) { - return LIBC_NAMESPACE::internal::tolower(a) - - LIBC_NAMESPACE::internal::tolower(b); + return static_cast(LIBC_NAMESPACE::internal::tolower(a)) - + static_cast(LIBC_NAMESPACE::internal::tolower(b)); }; LIBC_CRASH_ON_NULLPTR(haystack); diff --git a/libc/src/strings/strcasecmp.cpp b/libc/src/strings/strcasecmp.cpp index 4bbe2909df1e2..4518647deabe4 100644 --- a/libc/src/strings/strcasecmp.cpp +++ b/libc/src/strings/strcasecmp.cpp @@ -17,8 +17,8 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, strcasecmp, (const char *left, const char *right)) { auto case_cmp = [](char a, char b) { - return LIBC_NAMESPACE::internal::tolower(a) - - LIBC_NAMESPACE::internal::tolower(b); + return static_cast(LIBC_NAMESPACE::internal::tolower(a)) - + static_cast(LIBC_NAMESPACE::internal::tolower(b)); }; return inline_strcmp(left, right, case_cmp); } diff --git a/libc/src/strings/strcasecmp_l.cpp b/libc/src/strings/strcasecmp_l.cpp index 95117cb27a564..d77f95637a396 100644 --- a/libc/src/strings/strcasecmp_l.cpp +++ b/libc/src/strings/strcasecmp_l.cpp @@ -18,8 +18,8 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, strcasecmp_l, (const char *left, const char *right, locale_t)) { auto case_cmp = [](char a, char b) { - return LIBC_NAMESPACE::internal::tolower(a) - - LIBC_NAMESPACE::internal::tolower(b); + return static_cast(LIBC_NAMESPACE::internal::tolower(a)) - + static_cast(LIBC_NAMESPACE::internal::tolower(b)); }; return inline_strcmp(left, right, case_cmp); } diff --git a/libc/src/strings/strncasecmp.cpp b/libc/src/strings/strncasecmp.cpp index 9c2f0ab131269..a5926495a3e22 100644 --- a/libc/src/strings/strncasecmp.cpp +++ b/libc/src/strings/strncasecmp.cpp @@ -18,8 +18,8 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, strncasecmp, (const char *left, const char *right, size_t n)) { auto case_cmp = [](char a, char b) { - return LIBC_NAMESPACE::internal::tolower(a) - - LIBC_NAMESPACE::internal::tolower(b); + return static_cast(LIBC_NAMESPACE::internal::tolower(a)) - + static_cast(LIBC_NAMESPACE::internal::tolower(b)); }; return inline_strncmp(left, right, n, case_cmp); } diff --git a/libc/src/strings/strncasecmp_l.cpp b/libc/src/strings/strncasecmp_l.cpp index 91ac7e5e89107..a828f609fd9e8 100644 --- a/libc/src/strings/strncasecmp_l.cpp +++ b/libc/src/strings/strncasecmp_l.cpp @@ -18,8 +18,8 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, strncasecmp_l, (const char *left, const char *right, size_t n, locale_t)) { auto case_cmp = [](char a, char b) { - return LIBC_NAMESPACE::internal::tolower(a) - - LIBC_NAMESPACE::internal::tolower(b); + return static_cast(LIBC_NAMESPACE::internal::tolower(a)) - + static_cast(LIBC_NAMESPACE::internal::tolower(b)); }; return inline_strncmp(left, right, n, case_cmp); } diff --git a/libc/test/UnitTest/MemoryMatcher.cpp b/libc/test/UnitTest/MemoryMatcher.cpp index 6e375768e9333..405f226798f7a 100644 --- a/libc/test/UnitTest/MemoryMatcher.cpp +++ b/libc/test/UnitTest/MemoryMatcher.cpp @@ -41,8 +41,8 @@ bool MemoryMatcher::match(MemoryView actualValue) { static void display(char C) { const auto print = [](unsigned char i) { - tlog << static_cast(LIBC_NAMESPACE::internal::toupper( - LIBC_NAMESPACE::internal::int_to_b36_char(i))); + tlog << LIBC_NAMESPACE::internal::toupper( + LIBC_NAMESPACE::internal::int_to_b36_char(i)); }; print(static_cast(C) / 16); print(static_cast(C) & 15); diff --git a/libc/test/src/ctype/islower_test.cpp b/libc/test/src/ctype/islower_test.cpp index f877171abb9a3..e4e5f5cefd954 100644 --- a/libc/test/src/ctype/islower_test.cpp +++ b/libc/test/src/ctype/islower_test.cpp @@ -40,7 +40,7 @@ TEST(LlvmLibcIsLower, SimpleTest) { } TEST(LlvmLibcIsLower, DefaultLocale) { - // Loops through all characters, verifying that numbers and letters + // Loops through all characters, verifying that only lowercase letters // return non-zero integer and everything else returns a zero. for (int ch = -255; ch < 255; ++ch) { if (in_span(ch, LOWER_ARRAY)) diff --git a/libc/test/src/stdlib/StrtolTest.h b/libc/test/src/stdlib/StrtolTest.h index 03f0a6539c785..3a7da1fa85ac7 100644 --- a/libc/test/src/stdlib/StrtolTest.h +++ b/libc/test/src/stdlib/StrtolTest.h @@ -177,8 +177,8 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest { char small_string[4] = {'\0', '\0', '\0', '\0'}; for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_char(first_digit)); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_char(first_digit); if (first_digit < base) { ASSERT_EQ(func(small_string, nullptr, base), static_cast(first_digit)); @@ -192,11 +192,11 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest { for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_char(first_digit)); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_char(first_digit); for (int second_digit = 0; second_digit <= 36; ++second_digit) { - small_string[1] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_char(second_digit)); + small_string[1] = + LIBC_NAMESPACE::internal::int_to_b36_char(second_digit); if (first_digit < base && second_digit < base) { ASSERT_EQ( func(small_string, nullptr, base), @@ -216,14 +216,14 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest { for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_char(first_digit)); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_char(first_digit); for (int second_digit = 0; second_digit <= 36; ++second_digit) { - small_string[1] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_char(second_digit)); + small_string[1] = + LIBC_NAMESPACE::internal::int_to_b36_char(second_digit); for (int third_digit = 0; third_digit <= limit; ++third_digit) { - small_string[2] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_char(third_digit)); + small_string[2] = + LIBC_NAMESPACE::internal::int_to_b36_char(third_digit); if (first_digit < base && second_digit < base && third_digit < base) { diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 8d225d63cdf3e..9c14a7b991487 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1859,6 +1859,7 @@ libc_function( hdrs = ["src/ctype/isalnum.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1869,6 +1870,7 @@ libc_function( hdrs = ["src/ctype/isalpha.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1909,6 +1911,7 @@ libc_function( hdrs = ["src/ctype/isdigit.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1919,6 +1922,7 @@ libc_function( hdrs = ["src/ctype/isgraph.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1929,6 +1933,7 @@ libc_function( hdrs = ["src/ctype/islower.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1949,6 +1954,7 @@ libc_function( hdrs = ["src/ctype/ispunct.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1959,6 +1965,7 @@ libc_function( hdrs = ["src/ctype/isspace.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1969,6 +1976,7 @@ libc_function( hdrs = ["src/ctype/isupper.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1979,6 +1987,7 @@ libc_function( hdrs = ["src/ctype/isxdigit.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1999,6 +2008,7 @@ libc_function( hdrs = ["src/ctype/tolower.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -2009,6 +2019,7 @@ libc_function( hdrs = ["src/ctype/toupper.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) From e7f7973899f76773ae6e9a6b1e8c7e9f9cc5cb56 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Wed, 5 Nov 2025 11:03:25 -0800 Subject: [PATCH 45/61] [libc] Migrate wctype_utils to use wchar_t where applicable. (#166234) This is a counterpart of https://github.com/llvm/llvm-project/pull/166225 but for wctype_utils (which are not yet widely used). For now, I'm just changing the types from wint_t to wchar_t to match the regular ctype_utils change. The next change may rename most of the functions to match the name of ctype_utils variants, so that we could be calling them from the templated code operating on "const char*" and "const wchar_t*" strings, and the right function signature would be picked up. --- libc/src/__support/CMakeLists.txt | 1 + libc/src/__support/wctype_utils.h | 23 +++++++++--------- libc/src/wctype/iswalpha.cpp | 4 +++- libc/test/src/wchar/WcstolTest.h | 24 +++++++++---------- .../llvm-project-overlay/libc/BUILD.bazel | 1 + 5 files changed, 29 insertions(+), 24 deletions(-) diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index b7af751ec3f27..96874702b1fdf 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -161,6 +161,7 @@ add_header_library( HDRS wctype_utils.h DEPENDS + libc.hdr.types.wchar_t libc.hdr.types.wint_t ) diff --git a/libc/src/__support/wctype_utils.h b/libc/src/__support/wctype_utils.h index 2ae5ec93b2a63..60b6afb928475 100644 --- a/libc/src/__support/wctype_utils.h +++ b/libc/src/__support/wctype_utils.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_WCTYPE_UTILS_H #define LLVM_LIBC_SRC___SUPPORT_WCTYPE_UTILS_H +#include "hdr/types/wchar_t.h" #include "hdr/types/wint_t.h" #include "src/__support/CPP/optional.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE @@ -30,7 +31,7 @@ namespace internal { // Similarly, do not change these fumarks to show your new solution is faster, // as well as a way to support non-Anctions to use case ranges. e.g. -// bool iswlower(wint_t ch) { +// bool iswlower(wchar_t ch) { // switch(ch) { // case L'a'...L'z': // return true; @@ -40,7 +41,7 @@ namespace internal { // EBCDIC. Technically we could use some smaller ranges, but that's even harder // to read. -LIBC_INLINE static constexpr bool iswlower(wint_t wch) { +LIBC_INLINE static constexpr bool iswlower(wchar_t wch) { switch (wch) { case L'a': case L'b': @@ -74,7 +75,7 @@ LIBC_INLINE static constexpr bool iswlower(wint_t wch) { } } -LIBC_INLINE static constexpr bool iswupper(wint_t wch) { +LIBC_INLINE static constexpr bool iswupper(wchar_t wch) { switch (wch) { case L'A': case L'B': @@ -108,7 +109,7 @@ LIBC_INLINE static constexpr bool iswupper(wint_t wch) { } } -LIBC_INLINE static constexpr bool iswdigit(wint_t wch) { +LIBC_INLINE static constexpr bool iswdigit(wchar_t wch) { switch (wch) { case L'0': case L'1': @@ -126,7 +127,7 @@ LIBC_INLINE static constexpr bool iswdigit(wint_t wch) { } } -LIBC_INLINE static constexpr wint_t towlower(wint_t wch) { +LIBC_INLINE static constexpr wchar_t towlower(wchar_t wch) { switch (wch) { case L'A': return L'a'; @@ -185,7 +186,7 @@ LIBC_INLINE static constexpr wint_t towlower(wint_t wch) { } } -LIBC_INLINE static constexpr wint_t towupper(wint_t wch) { +LIBC_INLINE static constexpr wchar_t towupper(wchar_t wch) { switch (wch) { case L'a': return L'A'; @@ -244,7 +245,7 @@ LIBC_INLINE static constexpr wint_t towupper(wint_t wch) { } } -LIBC_INLINE static constexpr bool iswalpha(wint_t wch) { +LIBC_INLINE static constexpr bool iswalpha(wchar_t wch) { switch (wch) { case L'a': case L'b': @@ -304,7 +305,7 @@ LIBC_INLINE static constexpr bool iswalpha(wint_t wch) { } } -LIBC_INLINE static constexpr bool iswalnum(wint_t wch) { +LIBC_INLINE static constexpr bool iswalnum(wchar_t wch) { switch (wch) { case L'a': case L'b': @@ -374,7 +375,7 @@ LIBC_INLINE static constexpr bool iswalnum(wint_t wch) { } } -LIBC_INLINE static constexpr int b36_wchar_to_int(wint_t wch) { +LIBC_INLINE static constexpr int b36_wchar_to_int(wchar_t wch) { switch (wch) { case L'0': return 0; @@ -479,7 +480,7 @@ LIBC_INLINE static constexpr int b36_wchar_to_int(wint_t wch) { } } -LIBC_INLINE static constexpr wint_t int_to_b36_wchar(int num) { +LIBC_INLINE static constexpr wchar_t int_to_b36_wchar(int num) { // Can't actually use LIBC_ASSERT here because it depends on integer_to_string // which depends on this. @@ -562,7 +563,7 @@ LIBC_INLINE static constexpr wint_t int_to_b36_wchar(int num) { } } -LIBC_INLINE static constexpr bool iswspace(wint_t wch) { +LIBC_INLINE static constexpr bool iswspace(wchar_t wch) { switch (wch) { case L' ': case L'\t': diff --git a/libc/src/wctype/iswalpha.cpp b/libc/src/wctype/iswalpha.cpp index 09f55d391dbff..e151363b88d0b 100644 --- a/libc/src/wctype/iswalpha.cpp +++ b/libc/src/wctype/iswalpha.cpp @@ -14,6 +14,8 @@ namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(int, iswalpha, (wint_t c)) { return internal::iswalpha(c); } +LLVM_LIBC_FUNCTION(int, iswalpha, (wint_t c)) { + return internal::iswalpha(static_cast(c)); +} } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/wchar/WcstolTest.h b/libc/test/src/wchar/WcstolTest.h index 4d5b752e62238..cadf9e0c42b90 100644 --- a/libc/test/src/wchar/WcstolTest.h +++ b/libc/test/src/wchar/WcstolTest.h @@ -178,8 +178,8 @@ struct WcstoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest { wchar_t small_string[4] = {L'\0', L'\0', L'\0', L'\0'}; for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_wchar(first_digit)); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_wchar(first_digit); if (first_digit < base) { ASSERT_EQ(func(small_string, nullptr, base), static_cast(first_digit)); @@ -193,11 +193,11 @@ struct WcstoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest { for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_wchar(first_digit)); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_wchar(first_digit); for (int second_digit = 0; second_digit <= 36; ++second_digit) { - small_string[1] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_wchar(second_digit)); + small_string[1] = + LIBC_NAMESPACE::internal::int_to_b36_wchar(second_digit); if (first_digit < base && second_digit < base) { ASSERT_EQ( func(small_string, nullptr, base), @@ -217,14 +217,14 @@ struct WcstoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest { for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_wchar(first_digit)); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_wchar(first_digit); for (int second_digit = 0; second_digit <= 36; ++second_digit) { - small_string[1] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_wchar(second_digit)); + small_string[1] = + LIBC_NAMESPACE::internal::int_to_b36_wchar(second_digit); for (int third_digit = 0; third_digit <= limit; ++third_digit) { - small_string[2] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_wchar(third_digit)); + small_string[2] = + LIBC_NAMESPACE::internal::int_to_b36_wchar(third_digit); if (first_digit < base && second_digit < base && third_digit < base) { diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 9c14a7b991487..b65fe64acdea0 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1805,6 +1805,7 @@ libc_support_library( ":__support_cpp_optional", ":__support_macros_attributes", ":__support_macros_config", + ":types_wchar_t", ":types_wint_t", ], ) From 37fff6e17ee29e790f850f6e133d14a73c08a0f8 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 5 Nov 2025 11:06:22 -0800 Subject: [PATCH 46/61] [NFC][LLVM][IR] Cleanup namespace usage in LLVM IR cpp files (#166477) --- llvm/lib/IR/AsmWriter.cpp | 6 +----- llvm/lib/IR/DebugLoc.cpp | 6 ++---- llvm/lib/IR/DebugProgramInstruction.cpp | 6 +++--- llvm/lib/IR/FPEnv.cpp | 16 ++++++++-------- llvm/lib/IR/Operator.cpp | 4 ++-- llvm/lib/IR/PassTimingInfo.cpp | 12 +++++------- llvm/lib/IR/PseudoProbe.cpp | 12 ++++-------- llvm/lib/IR/ReplaceConstant.cpp | 12 +++++------- llvm/lib/IR/Use.cpp | 4 +--- llvm/lib/IR/User.cpp | 5 +++-- llvm/lib/IR/Verifier.cpp | 6 +----- 11 files changed, 35 insertions(+), 54 deletions(-) diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 95d954f6b8174..0c8565c927a24 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -758,14 +758,12 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) { AbstractSlotTrackerStorage::~AbstractSlotTrackerStorage() = default; -namespace llvm { - //===----------------------------------------------------------------------===// // SlotTracker Class: Enumerate slot numbers for unnamed values //===----------------------------------------------------------------------===// /// This class provides computation of slot numbers for LLVM Assembly writing. /// -class SlotTracker : public AbstractSlotTrackerStorage { +class llvm::SlotTracker : public AbstractSlotTrackerStorage { public: /// ValueMap - A mapping of Values to slot numbers. using ValueMap = DenseMap; @@ -943,8 +941,6 @@ class SlotTracker : public AbstractSlotTrackerStorage { void processDbgRecordMetadata(const DbgRecord &DVR); }; -} // end namespace llvm - ModuleSlotTracker::ModuleSlotTracker(SlotTracker &Machine, const Module *M, const Function *F) : M(M), F(F), Machine(&Machine) {} diff --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp index 01dafcab94ce9..bfba6e0cab6bf 100644 --- a/llvm/lib/IR/DebugLoc.cpp +++ b/llvm/lib/IR/DebugLoc.cpp @@ -10,10 +10,11 @@ #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfo.h" +using namespace llvm; + #if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN #include "llvm/Support/Signals.h" -namespace llvm { DbgLocOrigin::DbgLocOrigin(bool ShouldCollectTrace) { if (!ShouldCollectTrace) return; @@ -30,11 +31,8 @@ void DbgLocOrigin::addTrace() { auto &[Depth, StackTrace] = StackTraces.emplace_back(); Depth = sys::getStackTrace(StackTrace); } -} // namespace llvm #endif -using namespace llvm; - #if LLVM_ENABLE_DEBUGLOC_TRACKING_COVERAGE DILocAndCoverageTracking::DILocAndCoverageTracking(const DILocation *L) : TrackingMDNodeRef(const_cast(L)), DbgLocOrigin(!L), diff --git a/llvm/lib/IR/DebugProgramInstruction.cpp b/llvm/lib/IR/DebugProgramInstruction.cpp index d9357bba75510..6b1fd3907dc41 100644 --- a/llvm/lib/IR/DebugProgramInstruction.cpp +++ b/llvm/lib/IR/DebugProgramInstruction.cpp @@ -12,8 +12,9 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Compiler.h" -namespace llvm { +using namespace llvm; +namespace llvm { template DbgRecordParamRef::DbgRecordParamRef(const T *Param) : Ref(const_cast(Param)) {} @@ -28,6 +29,7 @@ template T *DbgRecordParamRef::get() const { template class LLVM_EXPORT_TEMPLATE DbgRecordParamRef; template class LLVM_EXPORT_TEMPLATE DbgRecordParamRef; template class LLVM_EXPORT_TEMPLATE DbgRecordParamRef; +} // namespace llvm DbgVariableRecord::DbgVariableRecord(const DbgVariableIntrinsic *DVI) : DbgRecord(ValueKind, DVI->getDebugLoc()), @@ -755,5 +757,3 @@ iterator_range::iterator> DbgMarker::cloneDebugInfoFrom( // We inserted a block at the end, return that range. return {First->getIterator(), StoredDbgRecords.end()}; } - -} // end namespace llvm diff --git a/llvm/lib/IR/FPEnv.cpp b/llvm/lib/IR/FPEnv.cpp index 67f21d3756e93..c41d7b3181a37 100644 --- a/llvm/lib/IR/FPEnv.cpp +++ b/llvm/lib/IR/FPEnv.cpp @@ -19,9 +19,10 @@ #include "llvm/IR/Intrinsics.h" #include -namespace llvm { +using namespace llvm; -std::optional convertStrToRoundingMode(StringRef RoundingArg) { +std::optional +llvm::convertStrToRoundingMode(StringRef RoundingArg) { // For dynamic rounding mode, we use round to nearest but we will set the // 'exact' SDNodeFlag so that the value will not be rounded. return StringSwitch>(RoundingArg) @@ -34,7 +35,8 @@ std::optional convertStrToRoundingMode(StringRef RoundingArg) { .Default(std::nullopt); } -std::optional convertRoundingModeToStr(RoundingMode UseRounding) { +std::optional +llvm::convertRoundingModeToStr(RoundingMode UseRounding) { std::optional RoundingStr; switch (UseRounding) { case RoundingMode::Dynamic: @@ -62,7 +64,7 @@ std::optional convertRoundingModeToStr(RoundingMode UseRounding) { } std::optional -convertStrToExceptionBehavior(StringRef ExceptionArg) { +llvm::convertStrToExceptionBehavior(StringRef ExceptionArg) { return StringSwitch>(ExceptionArg) .Case("fpexcept.ignore", fp::ebIgnore) .Case("fpexcept.maytrap", fp::ebMayTrap) @@ -71,7 +73,7 @@ convertStrToExceptionBehavior(StringRef ExceptionArg) { } std::optional -convertExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) { +llvm::convertExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) { std::optional ExceptStr; switch (UseExcept) { case fp::ebStrict: @@ -87,7 +89,7 @@ convertExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) { return ExceptStr; } -Intrinsic::ID getConstrainedIntrinsicID(const Instruction &Instr) { +Intrinsic::ID llvm::getConstrainedIntrinsicID(const Instruction &Instr) { Intrinsic::ID IID = Intrinsic::not_intrinsic; switch (Instr.getOpcode()) { case Instruction::FCmp: @@ -127,5 +129,3 @@ Intrinsic::ID getConstrainedIntrinsicID(const Instruction &Instr) { return IID; } - -} // namespace llvm diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp index 39e5463cb6fc3..c3e54a0fc0c7e 100644 --- a/llvm/lib/IR/Operator.cpp +++ b/llvm/lib/IR/Operator.cpp @@ -17,7 +17,8 @@ #include "ConstantsContext.h" -namespace llvm { +using namespace llvm; + bool Operator::hasPoisonGeneratingFlags() const { switch (getOpcode()) { case Instruction::Add: @@ -288,4 +289,3 @@ void FastMathFlags::print(raw_ostream &O) const { O << " afn"; } } -} // namespace llvm diff --git a/llvm/lib/IR/PassTimingInfo.cpp b/llvm/lib/IR/PassTimingInfo.cpp index 4e27086e97ac5..cb1b91a98b036 100644 --- a/llvm/lib/IR/PassTimingInfo.cpp +++ b/llvm/lib/IR/PassTimingInfo.cpp @@ -32,10 +32,10 @@ using namespace llvm; #define DEBUG_TYPE "time-passes" -namespace llvm { +using namespace llvm; -bool TimePassesIsEnabled = false; -bool TimePassesPerRun = false; +bool llvm::TimePassesIsEnabled = false; +bool llvm::TimePassesPerRun = false; static cl::opt EnableTiming( "time-passes", cl::location(TimePassesIsEnabled), cl::Hidden, @@ -139,7 +139,7 @@ PassTimingInfo *PassTimingInfo::TheTimeInfo; } // namespace legacy } // namespace -Timer *getPassTimer(Pass *P) { +Timer *llvm::getPassTimer(Pass *P) { legacy::PassTimingInfo::init(); if (legacy::PassTimingInfo::TheTimeInfo) return legacy::PassTimingInfo::TheTimeInfo->getPassTimer(P, P); @@ -148,7 +148,7 @@ Timer *getPassTimer(Pass *P) { /// If timing is enabled, report the times collected up to now and then reset /// them. -void reportAndResetTimings(raw_ostream *OutStream) { +void llvm::reportAndResetTimings(raw_ostream *OutStream) { if (legacy::PassTimingInfo::TheTimeInfo) legacy::PassTimingInfo::TheTimeInfo->print(OutStream); } @@ -315,5 +315,3 @@ void TimePassesHandler::registerCallbacks(PassInstrumentationCallbacks &PIC) { PIC.registerAfterAnalysisCallback( [this](StringRef P, Any) { this->stopAnalysisTimer(P); }); } - -} // namespace llvm diff --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp index 59f218cc3683b..3c05f4b1f86a2 100644 --- a/llvm/lib/IR/PseudoProbe.cpp +++ b/llvm/lib/IR/PseudoProbe.cpp @@ -19,9 +19,7 @@ using namespace llvm; -namespace llvm { - -std::optional +static std::optional extractProbeFromDiscriminator(const DILocation *DIL) { if (DIL) { auto Discriminator = DIL->getDiscriminator(); @@ -43,7 +41,7 @@ extractProbeFromDiscriminator(const DILocation *DIL) { return std::nullopt; } -std::optional +static std::optional extractProbeFromDiscriminator(const Instruction &Inst) { assert(isa(&Inst) && !isa(&Inst) && "Only call instructions should have pseudo probe encodes as their " @@ -53,7 +51,7 @@ extractProbeFromDiscriminator(const Instruction &Inst) { return std::nullopt; } -std::optional extractProbe(const Instruction &Inst) { +std::optional llvm::extractProbe(const Instruction &Inst) { if (const auto *II = dyn_cast(&Inst)) { PseudoProbe Probe; Probe.Id = II->getIndex()->getZExtValue(); @@ -73,7 +71,7 @@ std::optional extractProbe(const Instruction &Inst) { return std::nullopt; } -void setProbeDistributionFactor(Instruction &Inst, float Factor) { +void llvm::setProbeDistributionFactor(Instruction &Inst, float Factor) { assert(Factor >= 0 && Factor <= 1 && "Distribution factor must be in [0, 1.0]"); if (auto *II = dyn_cast(&Inst)) { @@ -111,5 +109,3 @@ void setProbeDistributionFactor(Instruction &Inst, float Factor) { } } } - -} // namespace llvm diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp index 962368f061851..b3586b45a23f2 100644 --- a/llvm/lib/IR/ReplaceConstant.cpp +++ b/llvm/lib/IR/ReplaceConstant.cpp @@ -16,7 +16,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" -namespace llvm { +using namespace llvm; static bool isExpandableUser(User *U) { return isa(U) || isa(U); @@ -49,10 +49,10 @@ static SmallVector expandUser(BasicBlock::iterator InsertPt, return NewInsts; } -bool convertUsersOfConstantsToInstructions(ArrayRef Consts, - Function *RestrictToFunc, - bool RemoveDeadConstants, - bool IncludeSelf) { +bool llvm::convertUsersOfConstantsToInstructions(ArrayRef Consts, + Function *RestrictToFunc, + bool RemoveDeadConstants, + bool IncludeSelf) { // Find all expandable direct users of Consts. SmallVector Stack; for (Constant *C : Consts) { @@ -121,5 +121,3 @@ bool convertUsersOfConstantsToInstructions(ArrayRef Consts, return Changed; } - -} // namespace llvm diff --git a/llvm/lib/IR/Use.cpp b/llvm/lib/IR/Use.cpp index 67882ba0144b4..504233575594d 100644 --- a/llvm/lib/IR/Use.cpp +++ b/llvm/lib/IR/Use.cpp @@ -9,7 +9,7 @@ #include "llvm/IR/Use.h" #include "llvm/IR/User.h" -namespace llvm { +using namespace llvm; void Use::swap(Use &RHS) { if (Val == RHS.Val) @@ -42,5 +42,3 @@ void Use::zap(Use *Start, const Use *Stop, bool del) { if (del) ::operator delete(Start); } - -} // namespace llvm diff --git a/llvm/lib/IR/User.cpp b/llvm/lib/IR/User.cpp index ab44cb4b8a3f7..9bb7c1298593a 100644 --- a/llvm/lib/IR/User.cpp +++ b/llvm/lib/IR/User.cpp @@ -11,8 +11,11 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IntrinsicInst.h" +using namespace llvm; + namespace llvm { class BasicBlock; +} //===----------------------------------------------------------------------===// // User Class @@ -214,5 +217,3 @@ LLVM_NO_SANITIZE_MEMORY_ATTRIBUTE void User::operator delete(void *Usr) { ::operator delete(Storage); } } - -} // namespace llvm diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 7917712846990..24f90bf6de7f5 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -136,9 +136,7 @@ static cl::opt VerifyNoAliasScopeDomination( cl::desc("Ensure that llvm.experimental.noalias.scope.decl for identical " "scopes are not dominating")); -namespace llvm { - -struct VerifierSupport { +struct llvm::VerifierSupport { raw_ostream *OS; const Module &M; ModuleSlotTracker MST; @@ -318,8 +316,6 @@ struct VerifierSupport { } }; -} // namespace llvm - namespace { class Verifier : public InstVisitor, VerifierSupport { From 00171b352def8afa314c89a090501e890326fb34 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 5 Nov 2025 11:06:57 -0800 Subject: [PATCH 47/61] [NFC][TableGen] Adopt CodeGenHelpers in SDNodeInfoEmitter (#165622) Use `IfDefEmitter` and `NamespaceEmitter` in SDNodeInfoEmitter. --- .../TableGen/SDNodeInfoEmitter/no-nodes.td | 1 + llvm/utils/TableGen/SDNodeInfoEmitter.cpp | 23 +++++++------------ 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/llvm/test/TableGen/SDNodeInfoEmitter/no-nodes.td b/llvm/test/TableGen/SDNodeInfoEmitter/no-nodes.td index 0c5c63db4c95b..cc0f87755cdc2 100644 --- a/llvm/test/TableGen/SDNodeInfoEmitter/no-nodes.td +++ b/llvm/test/TableGen/SDNodeInfoEmitter/no-nodes.td @@ -20,6 +20,7 @@ def MyTarget : Target; // CHECK-EMPTY: // CHECK-NEXT: namespace llvm { // CHECK-EMPTY: +// CHECK-EMPTY: // CHECK-NEXT: #ifdef __GNUC__ // CHECK-NEXT: #pragma GCC diagnostic push // CHECK-NEXT: #pragma GCC diagnostic ignored "-Woverlength-strings" diff --git a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp index 64f03dae83e7d..dd18d29e6c676 100644 --- a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp +++ b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp @@ -10,6 +10,7 @@ #include "Common/CodeGenDAGPatterns.h" // For SDNodeInfo. #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/TableGen/CodeGenHelpers.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/StringToOffsetTable.h" #include "llvm/TableGen/TableGenBackend.h" @@ -129,9 +130,8 @@ SDNodeInfoEmitter::SDNodeInfoEmitter(const RecordKeeper &RK) } void SDNodeInfoEmitter::emitEnum(raw_ostream &OS) const { - OS << "#ifdef GET_SDNODE_ENUM\n"; - OS << "#undef GET_SDNODE_ENUM\n\n"; - OS << "namespace llvm::" << TargetSDNodeNamespace << " {\n\n"; + IfDefEmitter IfDef(OS, "GET_SDNODE_ENUM"); + NamespaceEmitter NS(OS, "llvm::" + TargetSDNodeNamespace); if (!NodesByName.empty()) { StringRef FirstName = NodesByName.begin()->first; @@ -145,14 +145,11 @@ void SDNodeInfoEmitter::emitEnum(raw_ostream &OS) const { OS << "};\n\n"; OS << "static constexpr unsigned GENERATED_OPCODE_END = " << LastName - << " + 1;\n\n"; + << " + 1;\n"; } else { OS << "static constexpr unsigned GENERATED_OPCODE_END = " - "ISD::BUILTIN_OP_END;\n\n"; + "ISD::BUILTIN_OP_END;\n"; } - - OS << "} // namespace llvm::" << TargetSDNodeNamespace << "\n\n"; - OS << "#endif // GET_SDNODE_ENUM\n\n"; } std::vector SDNodeInfoEmitter::emitNodeNames(raw_ostream &OS) const { @@ -324,9 +321,8 @@ static void emitDesc(raw_ostream &OS, StringRef EnumName, void SDNodeInfoEmitter::emitDescs(raw_ostream &OS) const { StringRef TargetName = Target.getName(); - OS << "#ifdef GET_SDNODE_DESC\n"; - OS << "#undef GET_SDNODE_DESC\n\n"; - OS << "namespace llvm {\n"; + IfDefEmitter IfDef(OS, "GET_SDNODE_DESC"); + NamespaceEmitter NS(OS, "llvm"); std::vector NameOffsets = emitNodeNames(OS); std::vector> ConstraintOffsetsAndCounts = @@ -343,11 +339,8 @@ void SDNodeInfoEmitter::emitDescs(raw_ostream &OS) const { OS << formatv("static const SDNodeInfo {0}GenSDNodeInfo(\n" " /*NumOpcodes=*/{1}, {0}SDNodeDescs,\n" - " {0}SDNodeNames, {0}SDTypeConstraints);\n\n", + " {0}SDNodeNames, {0}SDTypeConstraints);\n", TargetName, NodesByName.size()); - - OS << "} // namespace llvm\n\n"; - OS << "#endif // GET_SDNODE_DESC\n\n"; } void SDNodeInfoEmitter::run(raw_ostream &OS) const { From 28a279ce14f913df71546d8201d5363682a75901 Mon Sep 17 00:00:00 2001 From: Ebuka Ezike Date: Wed, 5 Nov 2025 19:07:34 +0000 Subject: [PATCH 48/61] [lldb-dap] expand tilde in dap executable path (#162635) Users may have multiple devices and would like to resolve the homepath based on the machine they are on. expands the tilde `~` character at the front of the given file path. --- .../lldb-dap/src-ts/debug-adapter-factory.ts | 6 ++- lldb/tools/lldb-dap/src-ts/utils.ts | 41 +++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 lldb/tools/lldb-dap/src-ts/utils.ts diff --git a/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts b/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts index 7060638a94864..433d48fab9d85 100644 --- a/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts +++ b/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts @@ -6,6 +6,7 @@ import * as fs from "node:fs/promises"; import { ConfigureButton, OpenSettingsButton } from "./ui/show-error-message"; import { ErrorWithNotification } from "./ui/error-with-notification"; import { LogFilePathProvider, LogType } from "./logging"; +import { expandUser } from "./utils"; const exec = util.promisify(child_process.execFile); @@ -116,8 +117,9 @@ async function getDAPExecutable( configuration: vscode.DebugConfiguration, ): Promise { // Check if the executable was provided in the launch configuration. - const launchConfigPath = configuration["debugAdapterExecutable"]; + let launchConfigPath = configuration["debugAdapterExecutable"]; if (typeof launchConfigPath === "string" && launchConfigPath.length !== 0) { + launchConfigPath = expandUser(launchConfigPath); if (!(await isExecutable(launchConfigPath))) { throw new ErrorWithNotification( `Debug adapter path "${launchConfigPath}" is not a valid file. The path comes from your launch configuration.`, @@ -129,7 +131,7 @@ async function getDAPExecutable( // Check if the executable was provided in the extension's configuration. const config = vscode.workspace.getConfiguration("lldb-dap", workspaceFolder); - const configPath = config.get("executable-path"); + const configPath = expandUser(config.get("executable-path") ?? ""); if (configPath && configPath.length !== 0) { if (!(await isExecutable(configPath))) { throw new ErrorWithNotification( diff --git a/lldb/tools/lldb-dap/src-ts/utils.ts b/lldb/tools/lldb-dap/src-ts/utils.ts new file mode 100644 index 0000000000000..efebe0b0f42ba --- /dev/null +++ b/lldb/tools/lldb-dap/src-ts/utils.ts @@ -0,0 +1,41 @@ +import * as os from "os"; +import * as path from "path"; + +/** + * Expands the character `~` to the user's home directory + */ +export function expandUser(file_path: string): string { + if (os.platform() == "win32") { + return file_path; + } + + if (!file_path) { + return ""; + } + + if (!file_path.startsWith("~")) { + return file_path; + } + + const path_len = file_path.length; + if (path_len == 1) { + return os.homedir(); + } + + if (file_path.charAt(1) == path.sep) { + return path.join(os.homedir(), file_path.substring(1)); + } + + const sep_index = file_path.indexOf(path.sep); + const user_name_end = sep_index == -1 ? file_path.length : sep_index; + const user_name = file_path.substring(1, user_name_end); + try { + if (user_name == os.userInfo().username) { + return path.join(os.homedir(), file_path.substring(user_name_end)); + } + } catch (err) { + return file_path; + } + + return file_path; +} From 3d0a3674d9ae52ed685ce467a48653cc27a2e5eb Mon Sep 17 00:00:00 2001 From: Marcell Leleszi <59964679+mleleszi@users.noreply.github.com> Date: Wed, 5 Nov 2025 20:17:13 +0100 Subject: [PATCH 49/61] [libc] Make errno asserts noop on gpu targets (#166606) This patch defines errno unit and integration test asserts as noop on GPU targets. Checking for errnos is tests has caused build breakages in previous patches. --- libc/test/IntegrationTest/CMakeLists.txt | 1 + libc/test/IntegrationTest/test.h | 7 +++++++ libc/test/UnitTest/CMakeLists.txt | 1 + libc/test/UnitTest/ErrnoCheckingTest.h | 7 +++++++ 4 files changed, 16 insertions(+) diff --git a/libc/test/IntegrationTest/CMakeLists.txt b/libc/test/IntegrationTest/CMakeLists.txt index 235e9fe2f55ee..d0752ea178429 100644 --- a/libc/test/IntegrationTest/CMakeLists.txt +++ b/libc/test/IntegrationTest/CMakeLists.txt @@ -14,5 +14,6 @@ add_object_library( libc.hdr.stdint_proxy libc.src.__support.OSUtil.osutil libc.src.__support.CPP.atomic + libc.src.__support.macros.properties.architectures ${arch_specific_deps} ) diff --git a/libc/test/IntegrationTest/test.h b/libc/test/IntegrationTest/test.h index 4a03f7aa6318b..9f5a3dfb3583c 100644 --- a/libc/test/IntegrationTest/test.h +++ b/libc/test/IntegrationTest/test.h @@ -11,6 +11,7 @@ #include "src/__support/OSUtil/exit.h" #include "src/__support/OSUtil/io.h" +#include "src/__support/macros/properties/architectures.h" #define __AS_STRING(val) #val #define __CHECK_TRUE(file, line, val, should_exit) \ @@ -68,9 +69,15 @@ //////////////////////////////////////////////////////////////////////////////// // Errno checks. +#ifdef LIBC_TARGET_ARCH_IS_GPU +#define ASSERT_ERRNO_EQ(VAL) +#define ASSERT_ERRNO_SUCCESS() +#define ASSERT_ERRNO_FAILURE() +#else #define ASSERT_ERRNO_EQ(VAL) ASSERT_EQ(VAL, static_cast(errno)) #define ASSERT_ERRNO_SUCCESS() ASSERT_EQ(0, static_cast(errno)) #define ASSERT_ERRNO_FAILURE() ASSERT_NE(0, static_cast(errno)) +#endif // Integration tests are compiled with -ffreestanding which stops treating // the main function as a non-overloadable special function. Hence, we use a diff --git a/libc/test/UnitTest/CMakeLists.txt b/libc/test/UnitTest/CMakeLists.txt index 31d1e9dce8204..3197b3d7fd01b 100644 --- a/libc/test/UnitTest/CMakeLists.txt +++ b/libc/test/UnitTest/CMakeLists.txt @@ -204,5 +204,6 @@ add_header_library( ErrnoCheckingTest.h DEPENDS libc.src.__support.common + libc.src.__support.macros.properties.architectures libc.src.errno.errno ) diff --git a/libc/test/UnitTest/ErrnoCheckingTest.h b/libc/test/UnitTest/ErrnoCheckingTest.h index 5b1bc9441d830..111d812c58612 100644 --- a/libc/test/UnitTest/ErrnoCheckingTest.h +++ b/libc/test/UnitTest/ErrnoCheckingTest.h @@ -11,11 +11,17 @@ #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "src/__support/macros/properties/architectures.h" #include "test/UnitTest/Test.h" // Define macro to validate the value stored in the errno and restore it // to zero. +#ifdef LIBC_TARGET_ARCH_IS_GPU +#define ASSERT_ERRNO_EQ(VAL) +#define ASSERT_ERRNO_SUCCESS() +#define ASSERT_ERRNO_FAILURE() +#else #define ASSERT_ERRNO_EQ(VAL) \ do { \ ASSERT_EQ(VAL, static_cast(libc_errno)); \ @@ -27,6 +33,7 @@ ASSERT_NE(0, static_cast(libc_errno)); \ libc_errno = 0; \ } while (0) +#endif namespace LIBC_NAMESPACE_DECL { namespace testing { From e2d2affc70a8191ea67eee697e83ef4834c6b4a8 Mon Sep 17 00:00:00 2001 From: Krzysztof Drewniak Date: Wed, 5 Nov 2025 11:21:52 -0800 Subject: [PATCH 50/61] [AMDGPU][LowerBufferFatPointers] Fix crash with `select false` (#166471) If the input to LowerBufferFatPointers is such that the resource- and offset-specific `select` instructions generated for a `select` on `ptr addrspae(7)` fold away, the pass would crash when trying to replace an instruction with itself. This commit resolves the issue. Fixes https://github.com/iree-org/iree/issues/22551 --- .../Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp | 7 +++++-- .../AMDGPU/lower-buffer-fat-pointers-control-flow.ll | 12 ++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index 0a5913293238a..fdff21b6ef8df 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -1565,8 +1565,11 @@ void SplitPtrStructs::processConditionals() { } else if (isa(I)) { if (MaybeRsrc) { if (auto *RsrcInst = dyn_cast(Rsrc)) { - ConditionalTemps.push_back(RsrcInst); - RsrcInst->replaceAllUsesWith(*MaybeRsrc); + // Guard against conditionals that were already folded away. + if (RsrcInst != *MaybeRsrc) { + ConditionalTemps.push_back(RsrcInst); + RsrcInst->replaceAllUsesWith(*MaybeRsrc); + } } for (Value *V : Seen) FoundRsrcs[V] = *MaybeRsrc; diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-control-flow.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-control-flow.ll index 4fa7c29bfde02..71005224dd1e5 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-control-flow.ll @@ -481,3 +481,15 @@ define void @dominance_not_in_program_order(ptr addrspace(7) inreg %arg) { %lsr.iv11 = phi ptr addrspace(7) [ %arg, %.loopexit ], [ %arg, %.preheader15 ] br label %.loopexit } + +;; iree-org/iree#22551 - crash on something that reduces to the below non-canonical select. +define ptr addrspace(7) @noncanonical_const_cond(ptr addrspace(7) %x) { +; CHECK-LABEL: define { ptr addrspace(8), i32 } @noncanonical_const_cond +; CHECK-SAME: ({ ptr addrspace(8), i32 } [[RET:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[RET]], 0 +; CHECK-NEXT: [[X_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[RET]], 1 +; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[RET]] +; + %ret = select i1 false, ptr addrspace(7) %x, ptr addrspace(7) %x + ret ptr addrspace(7) %ret +} From 1041423393ff64834df793a8bd982fa6c898d5d8 Mon Sep 17 00:00:00 2001 From: SKill Date: Wed, 5 Nov 2025 20:29:50 +0100 Subject: [PATCH 51/61] [clang][SourceManager] Reuse code when computing Column and Line numbers (#166593) --- clang/include/clang/Basic/SourceManager.h | 20 +++++++++++++---- clang/lib/Basic/SourceManager.cpp | 27 +++++++---------------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index 6d9d074d78026..bc9e97863556d 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -1409,10 +1409,15 @@ class SourceManager : public RefCountedBase { /// before calling this method. unsigned getColumnNumber(FileID FID, unsigned FilePos, bool *Invalid = nullptr) const; + unsigned getColumnNumber(SourceLocation Loc, bool *Invalid = nullptr) const; unsigned getSpellingColumnNumber(SourceLocation Loc, - bool *Invalid = nullptr) const; + bool *Invalid = nullptr) const { + return getColumnNumber(getSpellingLoc(Loc), Invalid); + } unsigned getExpansionColumnNumber(SourceLocation Loc, - bool *Invalid = nullptr) const; + bool *Invalid = nullptr) const { + return getColumnNumber(getExpansionLoc(Loc), Invalid); + } unsigned getPresumedColumnNumber(SourceLocation Loc, bool *Invalid = nullptr) const; @@ -1423,8 +1428,15 @@ class SourceManager : public RefCountedBase { /// MemoryBuffer, so this is not cheap: use only when about to emit a /// diagnostic. unsigned getLineNumber(FileID FID, unsigned FilePos, bool *Invalid = nullptr) const; - unsigned getSpellingLineNumber(SourceLocation Loc, bool *Invalid = nullptr) const; - unsigned getExpansionLineNumber(SourceLocation Loc, bool *Invalid = nullptr) const; + unsigned getLineNumber(SourceLocation Loc, bool *Invalid = nullptr) const; + unsigned getSpellingLineNumber(SourceLocation Loc, + bool *Invalid = nullptr) const { + return getLineNumber(getSpellingLoc(Loc), Invalid); + } + unsigned getExpansionLineNumber(SourceLocation Loc, + bool *Invalid = nullptr) const { + return getLineNumber(getExpansionLoc(Loc), Invalid); + } unsigned getPresumedLineNumber(SourceLocation Loc, bool *Invalid = nullptr) const; /// Return the filename or buffer identifier of the buffer the diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 7dc81c50f87a2..b6cc6ec9365f5 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -1159,17 +1159,11 @@ static bool isInvalid(LocType Loc, bool *Invalid) { return MyInvalid; } -unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc, - bool *Invalid) const { - if (isInvalid(Loc, Invalid)) return 0; - FileIDAndOffset LocInfo = getDecomposedSpellingLoc(Loc); - return getColumnNumber(LocInfo.first, LocInfo.second, Invalid); -} - -unsigned SourceManager::getExpansionColumnNumber(SourceLocation Loc, - bool *Invalid) const { +unsigned SourceManager::getColumnNumber(SourceLocation Loc, + bool *Invalid) const { + assert(Loc.isFileID()); if (isInvalid(Loc, Invalid)) return 0; - FileIDAndOffset LocInfo = getDecomposedExpansionLoc(Loc); + FileIDAndOffset LocInfo = getDecomposedLoc(Loc); return getColumnNumber(LocInfo.first, LocInfo.second, Invalid); } @@ -1367,18 +1361,13 @@ unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos, return LineNo; } -unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc, - bool *Invalid) const { - if (isInvalid(Loc, Invalid)) return 0; - FileIDAndOffset LocInfo = getDecomposedSpellingLoc(Loc); - return getLineNumber(LocInfo.first, LocInfo.second); -} -unsigned SourceManager::getExpansionLineNumber(SourceLocation Loc, - bool *Invalid) const { +unsigned SourceManager::getLineNumber(SourceLocation Loc, bool *Invalid) const { + assert(Loc.isFileID()); if (isInvalid(Loc, Invalid)) return 0; - FileIDAndOffset LocInfo = getDecomposedExpansionLoc(Loc); + FileIDAndOffset LocInfo = getDecomposedLoc(Loc); return getLineNumber(LocInfo.first, LocInfo.second); } + unsigned SourceManager::getPresumedLineNumber(SourceLocation Loc, bool *Invalid) const { PresumedLoc PLoc = getPresumedLoc(Loc); From db6231b4c2e18bb5fc107624e9c9071b02124844 Mon Sep 17 00:00:00 2001 From: Jun Wang Date: Wed, 5 Nov 2025 11:52:56 -0800 Subject: [PATCH 52/61] [AMDGPU][MC] GFX9 - Support NV bit in FLAT instructions in pre-GFX90A (#154237) targets This patch enables support of the NV (non-volatile) bit in FLAT instructions in GFX9 (pre-GFX90A) targets. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 12 + .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 14 +- llvm/lib/Target/AMDGPU/FLATInstructions.td | 140 ++- .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 8 +- llvm/test/MC/AMDGPU/gfx90a_err.s | 43 + llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s | 52 +- llvm/test/MC/AMDGPU/gfx942_err.s | 28 + llvm/test/MC/AMDGPU/gfx9_asm_flat.s | 858 +++++++++++++++++ .../test/MC/Disassembler/AMDGPU/gfx9_flat.txt | 864 ++++++++++++++++++ 9 files changed, 1941 insertions(+), 78 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 54d94b1f8682e..4fe194c813c46 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2366,6 +2366,18 @@ def isGFX8GFX9NotGFX90A : " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>; +// Pre-90A GFX9s allow the NV bit in FLAT instructions. +def isNVAllowedInFlat : + Predicate<"!Subtarget->hasGFX90AInsts() &&" + " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, + AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX90AInsts), (not FeatureGFX10Insts))>; + +// GFX8 or GFX90A+ do not allow the NV bit in FLAT instructions. +def isNVNotAllowedInFlat : + Predicate<"(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) ||" + " ((Subtarget->getGeneration() == AMDGPUSubtarget::GFX9) && Subtarget->hasGFX90AInsts())">, + AssemblerPredicate <(any_of FeatureVolcanicIslands, FeatureGFX90AInsts)>; + def isGFX90AOnly : Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">, AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 09338c533fdf2..2808c44c59c11 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1602,6 +1602,11 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); } + bool isFlatInstAndNVAllowed(const MCInst &Inst) const { + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + return (TSFlags & SIInstrFlags::FLAT) && isGFX9() && !isGFX90A(); + } + AMDGPUTargetStreamer &getTargetStreamer() { MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); return static_cast(TS); @@ -5370,7 +5375,7 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]); Error(S, "scale_offset is not supported on this GPU"); } - if (CPol & CPol::NV) { + if ((CPol & CPol::NV) && !isFlatInstAndNVAllowed(Inst)) { SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); StringRef CStr(S.getPointer()); S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]); @@ -7145,6 +7150,13 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) { unsigned Enabled = 0, Seen = 0; for (;;) { SMLoc S = getLoc(); + + if (isGFX9() && trySkipId("nv")) { + Enabled |= CPol::NV; + Seen |= CPol::NV; + continue; + } + bool Disabling; unsigned CPol = getCPolKind(getId(), Mnemo, Disabling); if (!CPol) diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 8ea64d17417f7..6ef224148e44b 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -125,7 +125,7 @@ class FLAT_Real op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : bits<7> saddr; bits<10> vdst; - bits<5> cpol; + bits<6> cpol; // Only valid on gfx9 bits<1> lds = ps.lds; // LDS DMA for global and scratch @@ -2693,29 +2693,52 @@ class FLAT_Real_vi op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands); } +class FLAT_Real_vi_ex_gfx9 op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : + FLAT_Real_vi { + let AssemblerPredicate = isNVNotAllowedInFlat; +} + +class FLAT_Real_gfx9 op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : + FLAT_Real_vi { + let AssemblerPredicate = isNVAllowedInFlat; + let Subtarget = SIEncodingFamily.GFX9; + let DecoderNamespace = "GFX9"; + let Inst{55} = cpol{CPolBit.NV}; // nv - GFX9 (pre-90A) uses bit 55 as the non-volatile bit. +} + +multiclass FLAT_Real_mc_vi op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> { + def _vi: FLAT_Real_vi_ex_gfx9; + def _gfx9: FLAT_Real_gfx9; +} + multiclass FLAT_Real_AllAddr_vi op, bit has_sccb = !cast(NAME).has_sccb> { - def _vi : FLAT_Real_vi(NAME), has_sccb>; - def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR"), has_sccb>; + defm "" : FLAT_Real_mc_vi(NAME), has_sccb>; + defm _SADDR : FLAT_Real_mc_vi(NAME#"_SADDR"), has_sccb>; +} + +multiclass FLAT_Real_AllAddr_vi_ex_gfx9 op, + bit has_sccb = !cast(NAME).has_sccb> { + def _vi : FLAT_Real_vi_ex_gfx9(NAME), has_sccb>; + def _SADDR_vi : FLAT_Real_vi_ex_gfx9(NAME#"_SADDR"), has_sccb>; } class FLAT_Real_gfx940 op, FLAT_Pseudo ps> : FLAT_Real , SIMCInstr { let AssemblerPredicate = isGFX940Plus; - let DecoderNamespace = "GFX9"; + let DecoderNamespace = "GFX940"; let Inst{13} = ps.sve; let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); } multiclass FLAT_Real_AllAddr_SVE_vi op> { - def _vi : FLAT_Real_vi(NAME)> { - let AssemblerPredicate = isGFX8GFX9NotGFX940; - let OtherPredicates = [isGFX8GFX9NotGFX940]; - } - def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR")> { - let DecoderNamespace = "GFX9"; + let OtherPredicates = [isGFX8GFX9NotGFX940] in { + defm "" : FLAT_Real_mc_vi(NAME)>; } + + defm _SADDR_vi : FLAT_Real_mc_vi(NAME#"_SADDR")>; + let AssemblerPredicate = isGFX940Plus in { def _VE_gfx940 : FLAT_Real_gfx940(NAME)>; def _SVS_gfx940 : FLAT_Real_gfx940(NAME#"_SVS")>; @@ -2728,11 +2751,11 @@ multiclass FLAT_Real_AllAddr_LDS op, bits<7> pre_gfx940_op, bit has_sccb = !cast(NAME).has_sccb> { let OtherPredicates = [isGFX8GFX9NotGFX940] in { - def _vi : FLAT_Real_vi(NAME), has_sccb> { - let AsmString = pre_gfx940_name # !cast(NAME).AsmOperands # " lds"; + let AsmString = pre_gfx940_name # !cast(NAME).AsmOperands # " lds" in { + defm "" : FLAT_Real_mc_vi(NAME), has_sccb>; } - def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR"), has_sccb> { - let AsmString = pre_gfx940_name # !cast(NAME#"_SADDR").AsmOperands # " lds"; + let AsmString = pre_gfx940_name # !cast(NAME#"_SADDR").AsmOperands # " lds" in { + defm _SADDR : FLAT_Real_mc_vi(NAME#"_SADDR"), has_sccb>; } } @@ -2748,47 +2771,66 @@ multiclass FLAT_Real_AllAddr_SVE_LDS op, bits<7> pre_gfx940_op> { def _ST_gfx940 : FLAT_Real_gfx940(NAME#"_ST")>; } -def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; -def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; -def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; -def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; -def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; -def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; -def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; -def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; - -def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; -def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; -def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; -def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; -def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; -def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; -def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; -def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; - -def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; -def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; -def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; -def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; -def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; -def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; +defm FLAT_LOAD_UBYTE_vi : FLAT_Real_mc_vi <0x10, FLAT_LOAD_UBYTE>; +defm FLAT_LOAD_SBYTE_vi : FLAT_Real_mc_vi <0x11, FLAT_LOAD_SBYTE>; +defm FLAT_LOAD_USHORT_vi : FLAT_Real_mc_vi <0x12, FLAT_LOAD_USHORT>; +defm FLAT_LOAD_SSHORT_vi : FLAT_Real_mc_vi <0x13, FLAT_LOAD_SSHORT>; +defm FLAT_LOAD_DWORD_vi : FLAT_Real_mc_vi <0x14, FLAT_LOAD_DWORD>; +defm FLAT_LOAD_DWORDX2_vi : FLAT_Real_mc_vi <0x15, FLAT_LOAD_DWORDX2>; +defm FLAT_LOAD_DWORDX4_vi : FLAT_Real_mc_vi <0x17, FLAT_LOAD_DWORDX4>; +defm FLAT_LOAD_DWORDX3_vi : FLAT_Real_mc_vi <0x16, FLAT_LOAD_DWORDX3>; + +defm FLAT_STORE_BYTE_vi : FLAT_Real_mc_vi <0x18, FLAT_STORE_BYTE>; +defm FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_mc_vi <0x19, FLAT_STORE_BYTE_D16_HI>; +defm FLAT_STORE_SHORT_vi : FLAT_Real_mc_vi <0x1a, FLAT_STORE_SHORT>; +defm FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; +defm FLAT_STORE_DWORD_vi : FLAT_Real_mc_vi <0x1c, FLAT_STORE_DWORD>; +defm FLAT_STORE_DWORDX2_vi : FLAT_Real_mc_vi <0x1d, FLAT_STORE_DWORDX2>; +defm FLAT_STORE_DWORDX4_vi : FLAT_Real_mc_vi <0x1f, FLAT_STORE_DWORDX4>; +defm FLAT_STORE_DWORDX3_vi : FLAT_Real_mc_vi <0x1e, FLAT_STORE_DWORDX3>; + +defm FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_mc_vi <0x20, FLAT_LOAD_UBYTE_D16>; +defm FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; +defm FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_mc_vi <0x22, FLAT_LOAD_SBYTE_D16>; +defm FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; +defm FLAT_LOAD_SHORT_D16_vi : FLAT_Real_mc_vi <0x24, FLAT_LOAD_SHORT_D16>; +defm FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; multiclass FLAT_Real_Atomics_vi op, bit has_sccb = !cast(NAME).has_sccb> { defvar ps = !cast(NAME); - def _vi : FLAT_Real_vi(ps.PseudoInstr), has_sccb>; - def _RTN_vi : FLAT_Real_vi(ps.PseudoInstr # "_RTN"), has_sccb>; - def _RTN_agpr_vi : FLAT_Real_vi(ps.PseudoInstr # "_RTN_agpr"), has_sccb>; + defm "" : FLAT_Real_mc_vi(ps.PseudoInstr), has_sccb>; + defm _RTN : FLAT_Real_mc_vi(ps.PseudoInstr # "_RTN"), has_sccb>; + def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9(ps.PseudoInstr # "_RTN_agpr"), has_sccb>; +} + +multiclass FLAT_Real_Atomics_vi_ex_gfx9 op, + bit has_sccb = !cast(NAME).has_sccb> { + defvar ps = !cast(NAME); + def _vi : FLAT_Real_vi_ex_gfx9(ps.PseudoInstr), has_sccb>; + def _RTN_vi : FLAT_Real_vi_ex_gfx9(ps.PseudoInstr # "_RTN"), has_sccb>; + + def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9(ps.PseudoInstr # "_RTN_agpr"), has_sccb>; } multiclass FLAT_Global_Real_Atomics_vi op, bit has_sccb = !cast(NAME).has_sccb> : FLAT_Real_AllAddr_vi { - def _RTN_vi : FLAT_Real_vi (NAME#"_RTN"), has_sccb>; - def _SADDR_RTN_vi : FLAT_Real_vi (NAME#"_SADDR_RTN"), has_sccb>; + defm _RTN : FLAT_Real_mc_vi (NAME#"_RTN"), has_sccb>; + defm _SADDR_RTN : FLAT_Real_mc_vi (NAME#"_SADDR_RTN"), has_sccb>; + + def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 (NAME#"_RTN_agpr"), has_sccb>; + def _SADDR_RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 (NAME#"_SADDR_RTN_agpr"), has_sccb>; +} + +multiclass FLAT_Global_Real_Atomics_vi_ex_gfx9 op, + bit has_sccb = !cast(NAME).has_sccb> : + FLAT_Real_AllAddr_vi_ex_gfx9 { + def _RTN_vi : FLAT_Real_vi_ex_gfx9 (NAME#"_RTN"), has_sccb>; + def _SADDR_RTN_vi : FLAT_Real_vi_ex_gfx9 (NAME#"_SADDR_RTN"), has_sccb>; - def _RTN_agpr_vi : FLAT_Real_vi (NAME#"_RTN_agpr"), has_sccb>; - def _SADDR_RTN_agpr_vi : FLAT_Real_vi (NAME#"_SADDR_RTN_agpr"), has_sccb>; + def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 (NAME#"_RTN_agpr"), has_sccb>; + def _SADDR_RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 (NAME#"_SADDR_RTN_agpr"), has_sccb>; } defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40>; @@ -2950,10 +2992,10 @@ let AssemblerPredicate = isGFX940Plus in { defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>; defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>; defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>; - defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>; - defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>; - defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>; - defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; + defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi_ex_gfx9<0x4d>; + defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi_ex_gfx9<0x4e>; + defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi_ex_gfx9<0x52>; + defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi_ex_gfx9<0x52>; } // End AssemblerPredicate = isGFX940Plus //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 703ec0a4befa5..3e6f35dbf5e54 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -186,8 +186,12 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo, O << " dlc"; if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI)) O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc"); - if (Imm & ~CPol::ALL_pregfx12) - O << " /* unexpected cache policy bit */"; + if (Imm & ~CPol::ALL_pregfx12) { + if ((Imm & CPol::NV) && AMDGPU::isGFX9(STI) && !AMDGPU::isGFX90A(STI)) + O << " nv"; + else + O << " /* unexpected cache policy bit */"; + } } void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope, diff --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s index ff0dfb371bbbf..78e4f86ec1b90 100644 --- a/llvm/test/MC/AMDGPU/gfx90a_err.s +++ b/llvm/test/MC/AMDGPU/gfx90a_err.s @@ -674,3 +674,46 @@ v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// nv bit in FLAT instructions +flat_load_ubyte v5, v[2:3] offset:4095 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +flat_load_ubyte a5, v[2:3] offset:4095 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +flat_store_dword v[2:3], v5 offset:4095 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +flat_store_dword v[2:3], a5 offset:4095 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_load_ubyte v5, v[2:3], off offset:-1 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_store_byte v[2:3], v5, off offset:-1 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_atomic_add v[2:3], v5, off nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_atomic_swap a1, v[2:3], a2, off glc nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_atomic_swap_x2 v[2:3], v[4:5], off nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_atomic_swap_x2 v[2:3], a[4:5], off nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +scratch_load_ubyte v5, off, s2 offset:-1 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +scratch_load_ubyte a5, off, s2 offset:-1 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +scratch_store_dword v2, v3, off nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + diff --git a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s index c96a72ddc2573..3af0d83fb3056 100644 --- a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s +++ b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s @@ -706,107 +706,107 @@ flat_load_short_d16_hi a5, v[2:3] offset:4095 glc flat_load_short_d16_hi a5, v[2:3] offset:4095 slc // GFX90A: flat_atomic_swap a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x01,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_swap a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_cmpswap a0, v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x05,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_cmpswap a0, v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_add a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x09,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_add a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_sub a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x0d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_sub a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_smin a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x11,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_smin a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_umin a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x15,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_umin a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_smax a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x19,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_smax a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_umax a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x1d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_umax a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_and a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x21,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_and a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_or a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x25,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_or a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_xor a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x29,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_xor a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_inc a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x2d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_inc a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_dec a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x31,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_dec a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_swap_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x81,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_swap_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_cmpswap_x2 a[0:1], v[2:3], a[2:5] offset:4095 glc ; encoding: [0xff,0x0f,0x85,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_cmpswap_x2 a[0:1], v[2:3], a[2:5] offset:4095 glc // GFX90A: flat_atomic_add_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x89,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_add_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_sub_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x8d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_sub_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_smin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x91,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_smin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_umin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x95,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_umin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_smax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x99,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_smax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_umax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x9d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_umax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_and_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa1,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_and_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_or_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa5,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_or_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_xor_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa9,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_xor_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_inc_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xad,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_inc_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_dec_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xb1,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_dec_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_swap v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x00,0xdd,0x02,0x02,0x80,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx942_err.s b/llvm/test/MC/AMDGPU/gfx942_err.s index fd59a01b34a04..dc51bab65aa04 100644 --- a/llvm/test/MC/AMDGPU/gfx942_err.s +++ b/llvm/test/MC/AMDGPU/gfx942_err.s @@ -125,3 +125,31 @@ global_load_dword v[2:3], off lds scratch_load_dword v2, off lds // GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +// nv bit in FLAT instructions +flat_load_ubyte v5, v[2:3] offset:4095 nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +flat_store_dword v[2:3], v5 offset:4095 nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +flat_atomic_add_f32 v[2:3], v5 nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_load_dword v2, v[2:3], off sc0 nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_store_dword v[2:3], v5 off sc0 nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_atomic_add_f64 v[0:1], v[2:3], off sc1 nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_atomic_swap v0, v[2:3], v5 off sc0 nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +scratch_load_lds_dword v2, off nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +scratch_store_dword v2, v3, off nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_flat.s b/llvm/test/MC/AMDGPU/gfx9_asm_flat.s index 5cc3d2533a149..7687c0a478bd9 100644 --- a/llvm/test/MC/AMDGPU/gfx9_asm_flat.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_flat.s @@ -24,6 +24,18 @@ flat_load_ubyte v5, v[1:2] offset:4095 glc flat_load_ubyte v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05] +flat_load_ubyte v5, v[1:2] nv +// CHECK: [0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05] + flat_load_sbyte v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05] @@ -48,6 +60,18 @@ flat_load_sbyte v5, v[1:2] offset:4095 glc flat_load_sbyte v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05] +flat_load_sbyte v5, v[1:2] nv +// CHECK: [0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05] + flat_load_ushort v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05] @@ -72,6 +96,18 @@ flat_load_ushort v5, v[1:2] offset:4095 glc flat_load_ushort v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05] +flat_load_ushort v5, v[1:2] nv +// CHECK: [0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ushort v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ushort v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ushort v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05] + flat_load_sshort v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05] @@ -96,6 +132,18 @@ flat_load_sshort v5, v[1:2] offset:4095 glc flat_load_sshort v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05] +flat_load_sshort v5, v[1:2] nv +// CHECK: [0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sshort v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sshort v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sshort v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05] + flat_load_dword v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05] @@ -120,6 +168,18 @@ flat_load_dword v5, v[1:2] offset:4095 glc flat_load_dword v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05] +flat_load_dword v5, v[1:2] nv +// CHECK: [0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dword v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dword v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dword v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05] + flat_load_dwordx2 v[5:6], v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05] @@ -144,6 +204,18 @@ flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05] +flat_load_dwordx2 v[5:6], v[1:2] nv +// CHECK: [0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx2 v[5:6], v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05] + flat_load_dwordx3 v[5:7], v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05] @@ -168,6 +240,18 @@ flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05] +flat_load_dwordx3 v[5:7], v[1:2] nv +// CHECK: [0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx3 v[5:7], v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05] + flat_load_dwordx4 v[5:8], v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05] @@ -192,6 +276,18 @@ flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05] +flat_load_dwordx4 v[5:8], v[1:2] nv +// CHECK: [0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx4 v[5:8], v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05] + flat_store_byte v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00] @@ -216,6 +312,18 @@ flat_store_byte v[1:2], v2 offset:4095 glc flat_store_byte v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00] +flat_store_byte v[1:2], v2 nv +// CHECK: [0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00] + +flat_store_byte v[1:2], v2 offset:7 nv +// CHECK: [0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00] + +flat_store_byte v[1:2], v2 offset:4095 glc nv +// CHECK: [0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00] + +flat_store_byte v[1:2], v2 offset:4095 slc nv +// CHECK: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00] + flat_store_byte_d16_hi v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00] @@ -240,6 +348,18 @@ flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00] +flat_store_byte_d16_hi v[1:2], v2 nv +// CHECK: [0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00] + +flat_store_byte_d16_hi v[1:2], v2 offset:7 nv +// CHECK: [0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00] + +flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc nv +// CHECK: [0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00] + +flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc nv +// CHECK: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00] + flat_store_short v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00] @@ -264,6 +384,18 @@ flat_store_short v[1:2], v2 offset:4095 glc flat_store_short v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00] +flat_store_short v[1:2], v2 nv +// CHECK: [0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00] + +flat_store_short v[1:2], v2 offset:7 nv +// CHECK: [0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00] + +flat_store_short v[1:2], v2 offset:4095 glc nv +// CHECK: [0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00] + +flat_store_short v[1:2], v2 offset:4095 slc nv +// CHECK: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00] + flat_store_short_d16_hi v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00] @@ -288,6 +420,18 @@ flat_store_short_d16_hi v[1:2], v2 offset:4095 glc flat_store_short_d16_hi v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00] +flat_store_short_d16_hi v[1:2], v2 nv +// CHECK: [0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00] + +flat_store_short_d16_hi v[1:2], v2 offset:7 nv +// CHECK: [0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00] + +flat_store_short_d16_hi v[1:2], v2 offset:4095 glc nv +// CHECK: [0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00] + +flat_store_short_d16_hi v[1:2], v2 offset:4095 slc nv +// CHECK: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00] + flat_store_dword v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00] @@ -312,6 +456,18 @@ flat_store_dword v[1:2], v2 offset:4095 glc flat_store_dword v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00] +flat_store_dword v[1:2], v2 nv +// CHECK: [0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dword v[1:2], v2 offset:7 nv +// CHECK: [0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dword v[1:2], v2 offset:4095 glc nv +// CHECK: [0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dword v[1:2], v2 offset:4095 slc nv +// CHECK: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00] + flat_store_dwordx2 v[1:2], v[2:3] offset:4095 // CHECK: [0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00] @@ -336,6 +492,18 @@ flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc // CHECK: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00] +flat_store_dwordx2 v[1:2], v[2:3] nv +// CHECK: [0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx2 v[1:2], v[2:3] offset:7 nv +// CHECK: [0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00] + flat_store_dwordx3 v[1:2], v[2:4] offset:4095 // CHECK: [0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00] @@ -360,6 +528,18 @@ flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc // CHECK: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00] +flat_store_dwordx3 v[1:2], v[2:4] nv +// CHECK: [0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx3 v[1:2], v[2:4] offset:7 nv +// CHECK: [0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00] + flat_store_dwordx4 v[1:2], v[2:5] offset:4095 // CHECK: [0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00] @@ -384,6 +564,18 @@ flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc // CHECK: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00] +flat_store_dwordx4 v[1:2], v[2:5] nv +// CHECK: [0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx4 v[1:2], v[2:5] offset:7 nv +// CHECK: [0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00] + flat_load_ubyte_d16 v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05] @@ -408,6 +600,18 @@ flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05] +flat_load_ubyte_d16 v5, v[1:2] nv +// CHECK: [0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte_d16 v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05] + flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05] @@ -432,6 +636,18 @@ flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05] +flat_load_ubyte_d16_hi v5, v[1:2] nv +// CHECK: [0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte_d16_hi v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05] + flat_load_sbyte_d16 v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05] @@ -456,6 +672,18 @@ flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05] +flat_load_sbyte_d16 v5, v[1:2] nv +// CHECK: [0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte_d16 v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05] + flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05] @@ -480,6 +708,18 @@ flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05] +flat_load_sbyte_d16_hi v5, v[1:2] nv +// CHECK: [0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte_d16_hi v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05] + flat_load_short_d16 v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05] @@ -504,6 +744,18 @@ flat_load_short_d16 v5, v[1:2] offset:4095 glc flat_load_short_d16 v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05] +flat_load_short_d16 v5, v[1:2] nv +// CHECK: [0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05] + +flat_load_short_d16 v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05] + +flat_load_short_d16 v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05] + +flat_load_short_d16 v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05] + flat_load_short_d16_hi v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05] @@ -528,6 +780,18 @@ flat_load_short_d16_hi v5, v[1:2] offset:4095 glc flat_load_short_d16_hi v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05] +flat_load_short_d16_hi v5, v[1:2] nv +// CHECK: [0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05] + +flat_load_short_d16_hi v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05] + +flat_load_short_d16_hi v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05] + +flat_load_short_d16_hi v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05] + flat_atomic_swap v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00] @@ -552,6 +816,18 @@ flat_atomic_swap v0, v[1:2], v2 offset:4095 glc flat_atomic_swap v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00] +flat_atomic_swap v[1:2], v2 nv +// CHECK: [0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_swap v[1:2], v2 offset:7 nv +// CHECK: [0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_swap v0, v[1:2], v2 offset:4095 glc nv +// CHECK: [0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_swap v[1:2], v2 offset:4095 slc nv +// CHECK: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00] + flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 // CHECK: [0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00] @@ -576,6 +852,18 @@ flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc // CHECK: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00] +flat_atomic_cmpswap v[1:2], v[2:3] nv +// CHECK: [0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_cmpswap v[1:2], v[2:3] offset:7 nv +// CHECK: [0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00] + flat_atomic_add v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00] @@ -600,6 +888,18 @@ flat_atomic_add v0, v[1:2], v2 offset:4095 glc flat_atomic_add v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00] +flat_atomic_add v[1:2], v2 nv +// CHECK: [0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_add v[1:2], v2 offset:7 nv +// CHECK: [0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_add v0, v[1:2], v2 offset:4095 glc nv +// CHECK: [0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_add v[1:2], v2 offset:4095 slc nv +// CHECK: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00] + flat_atomic_sub v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00] @@ -1197,6 +1497,18 @@ global_load_ubyte v5, v1, s[4:5] offset:-1 glc global_load_ubyte v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x04,0x05] +global_load_ubyte v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05] + global_load_sbyte v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x04,0x05] @@ -1242,6 +1554,18 @@ global_load_sbyte v5, v1, s[4:5] offset:-1 glc global_load_sbyte v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x04,0x05] +global_load_sbyte v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05] + global_load_ushort v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x04,0x05] @@ -1287,6 +1611,18 @@ global_load_ushort v5, v1, s[4:5] offset:-1 glc global_load_ushort v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x04,0x05] +global_load_ushort v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05] + +global_load_ushort v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05] + +global_load_ushort v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05] + +global_load_ushort v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05] + global_load_sshort v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x04,0x05] @@ -1332,6 +1668,18 @@ global_load_sshort v5, v1, s[4:5] offset:-1 glc global_load_sshort v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x04,0x05] +global_load_sshort v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05] + +global_load_sshort v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05] + +global_load_sshort v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05] + +global_load_sshort v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05] + global_load_dword v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x04,0x05] @@ -1377,6 +1725,18 @@ global_load_dword v5, v1, s[4:5] offset:-1 glc global_load_dword v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x04,0x05] +global_load_dword v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05] + +global_load_dword v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05] + +global_load_dword v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05] + +global_load_dword v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05] + global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x04,0x05] @@ -1422,6 +1782,18 @@ global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x04,0x05] +global_load_dwordx2 v[5:6], v1, s[4:5] nv +// CHECK: [0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05] + +global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05] + +global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05] + +global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05] + global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x04,0x05] @@ -1467,6 +1839,15 @@ global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x04,0x05] +global_load_dwordx3 v[5:7], v1, s[4:5] nv +// CHECK: [0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05] +global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05] +global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05] +global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05] + global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x04,0x05] @@ -1512,6 +1893,15 @@ global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x04,0x05] +global_load_dwordx4 v[5:8], v1, s[4:5] nv +// CHECK: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05] +global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05] +global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05] +global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05] + global_store_byte v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x06,0x00] @@ -1557,6 +1947,18 @@ global_store_byte v1, v2, s[6:7] offset:-1 glc global_store_byte v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x06,0x00] +global_store_byte v1, v2, s[6:7] nv +// CHECK: [0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00] + +global_store_byte v1, v2, s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00] + +global_store_byte v1, v2, s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00] + +global_store_byte v1, v2, s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00] + global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x06,0x00] @@ -1602,6 +2004,18 @@ global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x06,0x00] +global_store_byte_d16_hi v1, v2, s[6:7] nv +// CHECK: [0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00] + +global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00] + +global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00] + +global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00] + global_store_short v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x06,0x00] @@ -1647,6 +2061,18 @@ global_store_short v1, v2, s[6:7] offset:-1 glc global_store_short v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x06,0x00] +global_store_short v1, v2, s[6:7] nv +// CHECK: [0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00] + +global_store_short v1, v2, s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00] + +global_store_short v1, v2, s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00] + +global_store_short v1, v2, s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00] + global_store_short_d16_hi v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x06,0x00] @@ -1692,6 +2118,18 @@ global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x06,0x00] +global_store_short_d16_hi v1, v2, s[6:7] nv +// CHECK: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00] + +global_store_short_d16_hi v1, v2, s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00] + +global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00] + +global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00] + global_store_dword v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x06,0x00] @@ -1737,6 +2175,18 @@ global_store_dword v1, v2, s[6:7] offset:-1 glc global_store_dword v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x06,0x00] +global_store_dword v1, v2, s[6:7] nv +// CHECK: [0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00] + +global_store_dword v1, v2, s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00] + +global_store_dword v1, v2, s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00] + +global_store_dword v1, v2, s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00] + global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x06,0x00] @@ -1782,6 +2232,18 @@ global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x06,0x00] +global_store_dwordx2 v1, v[2:3], s[6:7] nv +// CHECK: [0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00] + global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x06,0x00] @@ -1827,6 +2289,18 @@ global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x06,0x00] +global_store_dwordx3 v1, v[2:4], s[6:7] nv +// CHECK: [0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00] + global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x06,0x00] @@ -1872,6 +2346,18 @@ global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x06,0x00] +global_store_dwordx4 v1, v[2:5], s[6:7] nv +// CHECK: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00] + global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x04,0x05] @@ -1917,6 +2403,18 @@ global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x04,0x05] +global_load_ubyte_d16 v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05] + global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x04,0x05] @@ -1962,6 +2460,18 @@ global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x04,0x05] +global_load_ubyte_d16_hi v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05] + global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x04,0x05] @@ -2007,6 +2517,18 @@ global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x04,0x05] +global_load_sbyte_d16 v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05] + global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x04,0x05] @@ -2052,6 +2574,18 @@ global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x04,0x05] +global_load_sbyte_d16_hi v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05] + global_load_short_d16 v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x04,0x05] @@ -2097,6 +2631,18 @@ global_load_short_d16 v5, v1, s[4:5] offset:-1 glc global_load_short_d16 v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x04,0x05] +global_load_short_d16 v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05] + +global_load_short_d16 v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05] + +global_load_short_d16 v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05] + +global_load_short_d16 v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05] + global_load_short_d16_hi v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x04,0x05] @@ -2142,6 +2688,18 @@ global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x04,0x05] +global_load_short_d16_hi v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05] + +global_load_short_d16_hi v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05] + +global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05] + +global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05] + global_atomic_swap v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x06,0x00] @@ -2187,6 +2745,18 @@ global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc global_atomic_swap v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x06,0x00] +global_atomic_swap v1, v2, s[6:7] nv +// CHECK: [0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_swap v1, v2, s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_swap v1, v2, s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00] + global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x06,0x00] @@ -2232,6 +2802,18 @@ global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x06,0x00] +global_atomic_cmpswap v1, v[2:3], s[6:7] nv +// CHECK: [0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00] + global_atomic_add v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x06,0x00] @@ -2277,6 +2859,18 @@ global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc global_atomic_add v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x06,0x00] +global_atomic_add v1, v2, s[6:7] nv +// CHECK: [0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_add v1, v2, s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_add v1, v2, s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00] + global_atomic_sub v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x0c,0xdd,0x01,0x02,0x06,0x00] @@ -3357,6 +3951,18 @@ scratch_load_ubyte v5, off, s2 offset:-1 glc scratch_load_ubyte v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05] +scratch_load_ubyte v5, off, s2 nv +// CHECK: [0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05] + scratch_load_sbyte v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05] @@ -3402,6 +4008,18 @@ scratch_load_sbyte v5, off, s2 offset:-1 glc scratch_load_sbyte v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05] +scratch_load_sbyte v5, off, s2 nv +// CHECK: [0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05] + scratch_load_ushort v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05] @@ -3447,6 +4065,18 @@ scratch_load_ushort v5, off, s2 offset:-1 glc scratch_load_ushort v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05] +scratch_load_ushort v5, off, s2 nv +// CHECK: [0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ushort v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ushort v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ushort v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05] + scratch_load_sshort v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05] @@ -3492,6 +4122,18 @@ scratch_load_sshort v5, off, s2 offset:-1 glc scratch_load_sshort v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05] +scratch_load_sshort v5, off, s2 nv +// CHECK: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sshort v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sshort v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sshort v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05] + scratch_load_dword v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05] @@ -3537,6 +4179,18 @@ scratch_load_dword v5, off, s2 offset:-1 glc scratch_load_dword v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05] +scratch_load_dword v5, off, s2 nv +// CHECK: [0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dword v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dword v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dword v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05] + scratch_load_dwordx2 v[5:6], off, s2 offset:-1 // CHECK: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05] @@ -3582,6 +4236,18 @@ scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05] +scratch_load_dwordx2 v[5:6], off, s2 nv +// CHECK: [0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx2 v[5:6], off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05] + scratch_load_dwordx3 v[5:7], off, s2 offset:-1 // CHECK: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05] @@ -3627,6 +4293,18 @@ scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05] +scratch_load_dwordx3 v[5:7], off, s2 nv +// CHECK: [0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx3 v[5:7], off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05] + scratch_load_dwordx4 v[5:8], off, s2 offset:-1 // CHECK: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05] @@ -3672,6 +4350,18 @@ scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05] +scratch_load_dwordx4 v[5:8], off, s2 nv +// CHECK: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx4 v[5:8], off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05] + scratch_store_byte off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00] @@ -3717,6 +4407,18 @@ scratch_store_byte off, v2, s3 offset:-1 glc scratch_store_byte off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00] +scratch_store_byte off, v2, s3 nv +// CHECK: [0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_byte off, v2, s3 offset:-1 nv +// CHECK: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_byte off, v2, s3 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_byte off, v2, s3 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00] + scratch_store_byte_d16_hi off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00] @@ -3762,6 +4464,18 @@ scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00] +scratch_store_byte_d16_hi off, v2, s3 nv +// CHECK: [0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_byte_d16_hi off, v2, s3 offset:-1 nv +// CHECK: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00] + scratch_store_short off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00] @@ -3807,6 +4521,18 @@ scratch_store_short off, v2, s3 offset:-1 glc scratch_store_short off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00] +scratch_store_short off, v2, s3 nv +// CHECK: [0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_short off, v2, s3 offset:-1 nv +// CHECK: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_short off, v2, s3 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_short off, v2, s3 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00] + scratch_store_short_d16_hi off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00] @@ -3852,6 +4578,18 @@ scratch_store_short_d16_hi off, v2, s3 offset:-1 glc scratch_store_short_d16_hi off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00] +scratch_store_short_d16_hi off, v2, s3 nv +// CHECK: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_short_d16_hi off, v2, s3 offset:-1 nv +// CHECK: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_short_d16_hi off, v2, s3 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_short_d16_hi off, v2, s3 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00] + scratch_store_dword off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00] @@ -3897,6 +4635,18 @@ scratch_store_dword off, v2, s3 offset:-1 glc scratch_store_dword off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00] +scratch_store_dword off, v2, s3 nv +// CHECK: [0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dword off, v2, s3 offset:-1 nv +// CHECK: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dword off, v2, s3 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dword off, v2, s3 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00] + scratch_store_dwordx2 off, v[2:3], s3 offset:-1 // CHECK: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00] @@ -3942,6 +4692,18 @@ scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc // CHECK: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00] +scratch_store_dwordx2 off, v[2:3], s3 nv +// CHECK: [0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx2 off, v[2:3], s3 offset:-1 nv +// CHECK: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00] + scratch_store_dwordx3 off, v[2:4], s3 offset:-1 // CHECK: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00] @@ -3987,6 +4749,18 @@ scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc // CHECK: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00] +scratch_store_dwordx3 off, v[2:4], s3 nv +// CHECK: [0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx3 off, v[2:4], s3 offset:-1 nv +// CHECK: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00] + scratch_store_dwordx4 off, v[2:5], s3 offset:-1 // CHECK: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00] @@ -4032,6 +4806,18 @@ scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc // CHECK: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00] +scratch_store_dwordx4 off, v[2:5], s3 nv +// CHECK: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx4 off, v[2:5], s3 offset:-1 nv +// CHECK: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00] + scratch_load_ubyte_d16 v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05] @@ -4077,6 +4863,18 @@ scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05] +scratch_load_ubyte_d16 v5, off, s2 nv +// CHECK: [0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte_d16 v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05] + scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05] @@ -4122,6 +4920,18 @@ scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05] +scratch_load_ubyte_d16_hi v5, off, s2 nv +// CHECK: [0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05] + scratch_load_sbyte_d16 v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05] @@ -4167,6 +4977,18 @@ scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05] +scratch_load_sbyte_d16 v5, off, s2 nv +// CHECK: [0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte_d16 v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05] + scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05] @@ -4212,6 +5034,18 @@ scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05] +scratch_load_sbyte_d16_hi v5, off, s2 nv +// CHECK: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05] + scratch_load_short_d16 v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05] @@ -4254,6 +5088,18 @@ scratch_load_short_d16 v5, off, s2 offset:-4096 scratch_load_short_d16 v5, off, s2 offset:-1 glc // CHECK: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x02,0x05] +scratch_load_short_d16 v5, off, s2 nv +// CHECK: [0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_short_d16 v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_short_d16 v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_short_d16 v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05] + scratch_load_short_d16 v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05] @@ -4302,6 +5148,18 @@ scratch_load_short_d16_hi v5, off, s2 offset:-1 glc scratch_load_short_d16_hi v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05] +scratch_load_short_d16_hi v5, off, s2 nv +// CHECK: [0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_short_d16_hi v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_short_d16_hi v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_short_d16_hi v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05] + global_load_dword v[2:3], off lds // CHECK: [0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt index 0ee659e207c91..4c06585a4c2eb 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt @@ -21,6 +21,18 @@ # CHECK: flat_load_ubyte v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_ubyte v5, v[1:2] nv ; encoding: [0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_sbyte v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05 @@ -42,6 +54,18 @@ # CHECK: flat_load_sbyte v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_sbyte v5, v[1:2] nv ; encoding: [0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_ushort v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05 @@ -63,6 +87,18 @@ # CHECK: flat_load_ushort v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_ushort v5, v[1:2] nv ; encoding: [0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ushort v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ushort v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ushort v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_sshort v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05 @@ -84,6 +120,18 @@ # CHECK: flat_load_sshort v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_sshort v5, v[1:2] nv ; encoding: [0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sshort v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sshort v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sshort v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_dword v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05 @@ -105,6 +153,18 @@ # CHECK: flat_load_dword v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_dword v5, v[1:2] nv ; encoding: [0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dword v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dword v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dword v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05 @@ -126,6 +186,18 @@ # CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_dwordx2 v[5:6], v[1:2] nv ; encoding: [0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05 @@ -147,6 +219,18 @@ # CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_dwordx3 v[5:7], v[1:2] nv ; encoding: [0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05 @@ -168,6 +252,18 @@ # CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_dwordx4 v[5:8], v[1:2] nv ; encoding: [0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_store_byte v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00 @@ -189,6 +285,18 @@ # CHECK: flat_store_byte v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00 +# CHECK: flat_store_byte v[1:2], v2 nv ; encoding: [0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00] +0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_byte v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00] +0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_byte v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_byte v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00 + # CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00 @@ -210,6 +318,18 @@ # CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00 +# CHECK: flat_store_byte_d16_hi v[1:2], v2 nv ; encoding: [0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00] +0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00] +0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00 + # CHECK: flat_store_short v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00 @@ -231,6 +351,18 @@ # CHECK: flat_store_short v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00 +# CHECK: flat_store_short v[1:2], v2 nv ; encoding: [0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00] +0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_short v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00] +0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_short v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_short v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00 + # CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00 @@ -252,6 +384,18 @@ # CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00 +# CHECK: flat_store_short_d16_hi v[1:2], v2 nv ; encoding: [0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00] +0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00] +0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00 + # CHECK: flat_store_dword v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00 @@ -273,6 +417,18 @@ # CHECK: flat_store_dword v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00 +# CHECK: flat_store_dword v[1:2], v2 nv ; encoding: [0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00] +0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dword v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00] +0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dword v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dword v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00 + # CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00 @@ -294,6 +450,18 @@ # CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00 +# CHECK: flat_store_dwordx2 v[1:2], v[2:3] nv ; encoding: [0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00] +0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:7 nv ; encoding: [0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00] +0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc nv ; encoding: [0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc nv ; encoding: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00 + # CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 ; encoding: [0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00 @@ -315,6 +483,18 @@ # CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc ; encoding: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00 +# CHECK: flat_store_dwordx3 v[1:2], v[2:4] nv ; encoding: [0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00] +0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:7 nv ; encoding: [0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00] +0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc nv ; encoding: [0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc nv ; encoding: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00 + # CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 ; encoding: [0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00 @@ -336,6 +516,18 @@ # CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc ; encoding: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00 +# CHECK: flat_store_dwordx4 v[1:2], v[2:5] nv ; encoding: [0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00] +0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:7 nv ; encoding: [0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00] +0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc nv ; encoding: [0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc nv ; encoding: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00 + # CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05 @@ -357,6 +549,18 @@ # CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_ubyte_d16 v5, v[1:2] nv ; encoding: [0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05 @@ -378,6 +582,18 @@ # CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] nv ; encoding: [0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05 @@ -399,6 +615,18 @@ # CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_sbyte_d16 v5, v[1:2] nv ; encoding: [0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05 @@ -420,6 +648,18 @@ # CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] nv ; encoding: [0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05 @@ -441,6 +681,18 @@ # CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_short_d16 v5, v[1:2] nv ; encoding: [0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_short_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05 @@ -462,6 +714,18 @@ # CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_short_d16_hi v5, v[1:2] nv ; encoding: [0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_atomic_swap v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00 @@ -483,6 +747,18 @@ # CHECK: flat_atomic_swap v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00 +# CHECK: flat_atomic_swap v[1:2], v2 nv ; encoding: [0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00] +0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_swap v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00] +0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_swap v0, v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00] +0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_swap v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00] +0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00 + # CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00 @@ -504,6 +780,18 @@ # CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00 +# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] nv ; encoding: [0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00] +0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:7 nv ; encoding: [0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00] +0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc nv ; encoding: [0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00] +0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc nv ; encoding: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00] +0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00 + # CHECK: flat_atomic_add v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00 @@ -525,6 +813,18 @@ # CHECK: flat_atomic_add v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00 +# CHECK: flat_atomic_add v[1:2], v2 nv ; encoding: [0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00] +0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_add v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00] +0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_add v0, v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00] +0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_add v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00] +0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00 + # CHECK: flat_atomic_sub v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00 @@ -1017,6 +1317,18 @@ # CHECK: global_load_ubyte v5, v[1:2], off ; encoding: [0x00,0x80,0x40,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x40,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_ubyte v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_sbyte v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x44,0xdc,0x01,0x00,0x7f,0x05 @@ -1026,6 +1338,18 @@ # CHECK: global_load_sbyte v5, v[1:2], off ; encoding: [0x00,0x80,0x44,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x44,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_sbyte v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_ushort v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x48,0xdc,0x01,0x00,0x7f,0x05 @@ -1035,6 +1359,18 @@ # CHECK: global_load_ushort v5, v[1:2], off ; encoding: [0x00,0x80,0x48,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x48,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_ushort v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_sshort v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x4c,0xdc,0x01,0x00,0x7f,0x05 @@ -1044,6 +1380,18 @@ # CHECK: global_load_sshort v5, v[1:2], off ; encoding: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x4c,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_sshort v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_dword v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x50,0xdc,0x01,0x00,0x7f,0x05 @@ -1053,6 +1401,18 @@ # CHECK: global_load_dword v5, v[1:2], off ; encoding: [0x00,0x80,0x50,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x50,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_dword v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_dwordx2 v[5:6], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x54,0xdc,0x01,0x00,0x7f,0x05 @@ -1062,6 +1422,18 @@ # CHECK: global_load_dwordx2 v[5:6], v[1:2], off ; encoding: [0x00,0x80,0x54,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x54,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] nv ; encoding: [0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_dwordx3 v[5:7], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x58,0xdc,0x01,0x00,0x7f,0x05 @@ -1071,6 +1443,18 @@ # CHECK: global_load_dwordx3 v[5:7], v[1:2], off ; encoding: [0x00,0x80,0x58,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x58,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] nv ; encoding: [0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_dwordx4 v[5:8], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x5c,0xdc,0x01,0x00,0x7f,0x05 @@ -1080,6 +1464,18 @@ # CHECK: global_load_dwordx4 v[5:8], v[1:2], off ; encoding: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x5c,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] nv ; encoding: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_store_byte v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x60,0xdc,0x01,0x02,0x7f,0x00 @@ -1089,6 +1485,18 @@ # CHECK: global_store_byte v[1:2], v2, off ; encoding: [0x00,0x80,0x60,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x60,0xdc,0x01,0x02,0x7f,0x00 +# CHECK: global_store_byte v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00] +0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00 + # CHECK: global_store_byte_d16_hi v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x64,0xdc,0x01,0x02,0x7f,0x00 @@ -1098,6 +1506,18 @@ # CHECK: global_store_byte_d16_hi v[1:2], v2, off ; encoding: [0x00,0x80,0x64,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x64,0xdc,0x01,0x02,0x7f,0x00 +# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00] +0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00 + # CHECK: global_store_short v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x68,0xdc,0x01,0x02,0x7f,0x00 @@ -1107,6 +1527,18 @@ # CHECK: global_store_short v[1:2], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x68,0xdc,0x01,0x02,0x7f,0x00 +# CHECK: global_store_short v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00] +0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_short v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_short v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_short v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00 + # CHECK: global_store_short_d16_hi v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x6c,0xdc,0x01,0x02,0x7f,0x00 @@ -1116,6 +1548,18 @@ # CHECK: global_store_short_d16_hi v[1:2], v2, off ; encoding: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x6c,0xdc,0x01,0x02,0x7f,0x00 +# CHECK: global_store_short_d16_hi v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00] +0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00 + # CHECK: global_store_dword v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x70,0xdc,0x01,0x02,0x7f,0x00 @@ -1125,6 +1569,18 @@ # CHECK: global_store_dword v[1:2], v2, off ; encoding: [0x00,0x80,0x70,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x70,0xdc,0x01,0x02,0x7f,0x00 +# CHECK: global_store_dword v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00] +0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00 + # CHECK: global_store_dwordx2 v[1:2], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x74,0xdc,0x01,0x02,0x7f,0x00 @@ -1134,6 +1590,18 @@ # CHECK: global_store_dwordx2 v[1:2], v[2:3], off ; encoding: [0x00,0x80,0x74,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x74,0xdc,0x01,0x02,0x7f,0x00 +# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] nv ; encoding: [0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00] +0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00 + # CHECK: global_store_dwordx3 v[1:2], v[2:4], off offset:-1 ; encoding: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x78,0xdc,0x01,0x02,0x7f,0x00 @@ -1143,6 +1611,18 @@ # CHECK: global_store_dwordx3 v[1:2], v[2:4], off ; encoding: [0x00,0x80,0x78,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x78,0xdc,0x01,0x02,0x7f,0x00 +# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] nv ; encoding: [0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00] +0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00 + # CHECK: global_store_dwordx4 v[1:2], v[2:5], off offset:-1 ; encoding: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x7c,0xdc,0x01,0x02,0x7f,0x00 @@ -1152,6 +1632,18 @@ # CHECK: global_store_dwordx4 v[1:2], v[2:5], off ; encoding: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x7c,0xdc,0x01,0x02,0x7f,0x00 +# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] nv ; encoding: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00] +0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00 + # CHECK: global_load_ubyte_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x80,0xdc,0x01,0x00,0x7f,0x05 @@ -1161,6 +1653,18 @@ # CHECK: global_load_ubyte_d16 v5, v[1:2], off ; encoding: [0x00,0x80,0x80,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x80,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_ubyte_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x84,0xdc,0x01,0x00,0x7f,0x05 @@ -1170,6 +1674,18 @@ # CHECK: global_load_ubyte_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x84,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x84,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_sbyte_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x88,0xdc,0x01,0x00,0x7f,0x05 @@ -1179,6 +1695,18 @@ # CHECK: global_load_sbyte_d16 v5, v[1:2], off ; encoding: [0x00,0x80,0x88,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x88,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_sbyte_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x8c,0xdc,0x01,0x00,0x7f,0x05 @@ -1188,6 +1716,18 @@ # CHECK: global_load_sbyte_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x8c,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_short_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x90,0xdc,0x01,0x00,0x7f,0x05 @@ -1197,6 +1737,18 @@ # CHECK: global_load_short_d16 v5, v[1:2], off ; encoding: [0x00,0x80,0x90,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x90,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_short_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_short_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x94,0xdc,0x01,0x00,0x7f,0x05 @@ -1206,6 +1758,18 @@ # CHECK: global_load_short_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x94,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x94,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_short_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_atomic_swap v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x00,0xdd,0x01,0x02,0x7f,0x00 @@ -1215,6 +1779,18 @@ # CHECK: global_atomic_swap v[1:2], v2, off ; encoding: [0x00,0x80,0x00,0xdd,0x01,0x02,0x7f,0x00] 0x00,0x80,0x00,0xdd,0x01,0x02,0x7f,0x00 +# CHECK: global_atomic_swap v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00] +0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_swap v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_swap v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00 + # CHECK: global_atomic_cmpswap v[1:2], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x04,0xdd,0x01,0x02,0x7f,0x00 @@ -1236,6 +1812,18 @@ # CHECK: global_atomic_cmpswap v1, v[2:3], v[4:5], off glc ; encoding: [0x00,0x80,0x05,0xdd,0x02,0x04,0x7f,0x01] 0x00,0x80,0x05,0xdd,0x02,0x04,0x7f,0x01 +# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] nv ; encoding: [0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00] +0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00 + # CHECK: global_atomic_add v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x08,0xdd,0x01,0x02,0x7f,0x00 @@ -1245,6 +1833,18 @@ # CHECK: global_atomic_add v[1:2], v2, off ; encoding: [0x00,0x80,0x08,0xdd,0x01,0x02,0x7f,0x00] 0x00,0x80,0x08,0xdd,0x01,0x02,0x7f,0x00 +# CHECK: global_atomic_add v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00] +0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_add v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_add v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00 + # CHECK: global_atomic_sub v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x0c,0xdd,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x0c,0xdd,0x01,0x02,0x7f,0x00 @@ -1503,6 +2103,18 @@ # CHECK: scratch_load_ubyte v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_ubyte v5, off, s2 nv ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_sbyte v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05 @@ -1542,6 +2154,18 @@ # CHECK: scratch_load_sbyte v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_sbyte v5, off, s2 nv ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_ushort v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05 @@ -1581,6 +2205,18 @@ # CHECK: scratch_load_ushort v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_ushort v5, off, s2 nv ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ushort v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ushort v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ushort v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_sshort v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05 @@ -1620,6 +2256,18 @@ # CHECK: scratch_load_sshort v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_sshort v5, off, s2 nv ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sshort v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sshort v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sshort v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_dword v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05 @@ -1659,6 +2307,18 @@ # CHECK: scratch_load_dword v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_dword v5, off, s2 nv ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dword v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dword v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dword v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05 @@ -1698,6 +2358,18 @@ # CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_dwordx2 v[5:6], off, s2 nv ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05 @@ -1737,6 +2409,18 @@ # CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_dwordx3 v[5:7], off, s2 nv ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05 @@ -1776,6 +2460,18 @@ # CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_dwordx4 v[5:8], off, s2 nv ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_store_byte off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00 @@ -1815,6 +2511,18 @@ # CHECK: scratch_store_byte off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00 +# CHECK: scratch_store_byte off, v2, s3 nv ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00] +0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_byte off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_byte off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_byte off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00 + # CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00 @@ -1854,6 +2562,18 @@ # CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00 +# CHECK: scratch_store_byte_d16_hi off, v2, s3 nv ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00] +0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00 + # CHECK: scratch_store_short off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00 @@ -1893,6 +2613,18 @@ # CHECK: scratch_store_short off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00 +# CHECK: scratch_store_short off, v2, s3 nv ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00] +0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_short off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_short off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_short off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00 + # CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00 @@ -1932,6 +2664,18 @@ # CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00 +# CHECK: scratch_store_short_d16_hi off, v2, s3 nv ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00] +0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00 + # CHECK: scratch_store_dword off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00 @@ -1971,6 +2715,18 @@ # CHECK: scratch_store_dword off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00 +# CHECK: scratch_store_dword off, v2, s3 nv ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00] +0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dword off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dword off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dword off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00 + # CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00 @@ -2010,6 +2766,18 @@ # CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00 +# CHECK: scratch_store_dwordx2 off, v[2:3], s3 nv ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00] +0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00 + # CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00 @@ -2049,6 +2817,18 @@ # CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00 +# CHECK: scratch_store_dwordx3 off, v[2:4], s3 nv ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00] +0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00 + # CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00 @@ -2088,6 +2868,18 @@ # CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00 +# CHECK: scratch_store_dwordx4 off, v[2:5], s3 nv ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00] +0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00 + # CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05 @@ -2127,6 +2919,18 @@ # CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_ubyte_d16 v5, off, s2 nv ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05 @@ -2166,6 +2970,18 @@ # CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05 @@ -2205,6 +3021,18 @@ # CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_sbyte_d16 v5, off, s2 nv ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05 @@ -2244,6 +3072,18 @@ # CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05 @@ -2283,6 +3123,18 @@ # CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_short_d16 v5, off, s2 nv ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x94,0xdc,0x00,0x00,0x02,0x05 @@ -2322,6 +3174,18 @@ # CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_short_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05 + # CHECK: global_load_dword v[2:3], off lds ; encoding: [0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00] 0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00 From 6c4f9688082361a5c5d57aa1e6d368dfc4aeea75 Mon Sep 17 00:00:00 2001 From: Hannes Braun Date: Wed, 5 Nov 2025 20:55:32 +0100 Subject: [PATCH 53/61] [clang-format] Fix brace wrapping for Java records (#164711) The brace wrapping for Java records should now behave similar to classes. Before, opening braces for Java records were always placed in the same line as the record definition. --- clang/lib/Format/UnwrappedLineFormatter.cpp | 8 +++++--- clang/lib/Format/UnwrappedLineParser.cpp | 12 ++++++++---- clang/unittests/Format/FormatTestJava.cpp | 13 +++++++++++++ 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index ac9c81d4416c9..d31d656a63fc5 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -285,7 +285,8 @@ class LineJoiner { if (Tok && Tok->is(tok::kw_typedef)) Tok = Tok->getNextNonComment(); if (Tok && Tok->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union, - tok::kw_extern, Keywords.kw_interface)) { + tok::kw_extern, Keywords.kw_interface, + Keywords.kw_record)) { return !Style.BraceWrapping.SplitEmptyRecord && EmptyBlock ? tryMergeSimpleBlock(I, E, Limit) : 0; @@ -498,7 +499,8 @@ class LineJoiner { ShouldMerge = Style.AllowShortEnumsOnASingleLine; } else if (TheLine->Last->is(TT_CompoundRequirementLBrace)) { ShouldMerge = Style.AllowShortCompoundRequirementOnASingleLine; - } else if (TheLine->Last->isOneOf(TT_ClassLBrace, TT_StructLBrace)) { + } else if (TheLine->Last->isOneOf(TT_ClassLBrace, TT_StructLBrace, + TT_RecordLBrace)) { // NOTE: We use AfterClass (whereas AfterStruct exists) for both classes // and structs, but it seems that wrapping is still handled correctly // elsewhere. @@ -507,7 +509,7 @@ class LineJoiner { !Style.BraceWrapping.SplitEmptyRecord); } else if (TheLine->InPPDirective || TheLine->First->isNoneOf(tok::kw_class, tok::kw_enum, - tok::kw_struct)) { + tok::kw_struct, Keywords.kw_record)) { // Try to merge a block with left brace unwrapped that wasn't yet // covered. ShouldMerge = !Style.BraceWrapping.AfterFunction || diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 5e2584edac8f4..8b7dd02d548af 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -948,7 +948,11 @@ static bool isIIFE(const UnwrappedLine &Line, } static bool ShouldBreakBeforeBrace(const FormatStyle &Style, - const FormatToken &InitialToken) { + const FormatToken &InitialToken, + const bool IsJavaRecord) { + if (IsJavaRecord) + return Style.BraceWrapping.AfterClass; + tok::TokenKind Kind = InitialToken.Tok.getKind(); if (InitialToken.is(TT_NamespaceMacro)) Kind = tok::kw_namespace; @@ -3200,7 +3204,7 @@ void UnwrappedLineParser::parseNamespace() { if (FormatTok->is(tok::l_brace)) { FormatTok->setFinalizedType(TT_NamespaceLBrace); - if (ShouldBreakBeforeBrace(Style, InitialToken)) + if (ShouldBreakBeforeBrace(Style, InitialToken, /*IsJavaRecord=*/false)) addUnwrappedLine(); unsigned AddLevels = @@ -3865,7 +3869,7 @@ bool UnwrappedLineParser::parseEnum() { } if (!Style.AllowShortEnumsOnASingleLine && - ShouldBreakBeforeBrace(Style, InitialToken)) { + ShouldBreakBeforeBrace(Style, InitialToken, /*IsJavaRecord=*/false)) { addUnwrappedLine(); } // Parse enum body. @@ -4160,7 +4164,7 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr, bool IsJavaRecord) { if (ParseAsExpr) { parseChildBlock(); } else { - if (ShouldBreakBeforeBrace(Style, InitialToken)) + if (ShouldBreakBeforeBrace(Style, InitialToken, IsJavaRecord)) addUnwrappedLine(); unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; diff --git a/clang/unittests/Format/FormatTestJava.cpp b/clang/unittests/Format/FormatTestJava.cpp index 1416614bae29a..3cc97e2dc0b2e 100644 --- a/clang/unittests/Format/FormatTestJava.cpp +++ b/clang/unittests/Format/FormatTestJava.cpp @@ -848,6 +848,19 @@ TEST_F(FormatTestJava, TextBlock) { " Pat Q. Smith"); } +TEST_F(FormatTestJava, BreakAfterRecord) { + auto Style = getLLVMStyle(FormatStyle::LK_Java); + Style.EmptyLineBeforeAccessModifier = FormatStyle::ELBAMS_Never; + Style.BreakBeforeBraces = FormatStyle::BS_Custom; + Style.BraceWrapping.AfterClass = true; + Style.BraceWrapping.SplitEmptyRecord = true; + + verifyFormat("public record Foo(int i)\n" + "{\n" + "}", + "public record Foo(int i) {}", Style); +} + } // namespace } // namespace test } // namespace format From 0469ff0a212d7f3dea464c52e19d56e22b5af858 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 5 Nov 2025 11:55:59 -0800 Subject: [PATCH 54/61] TableGen: Split RuntimeLibcallsEmitter into separate utility header (#166583) This information will be needed in more emitters, so start factoring it to be more reusable. --- llvm/utils/TableGen/Basic/CMakeLists.txt | 1 + llvm/utils/TableGen/Basic/RuntimeLibcalls.cpp | 93 ++++++ llvm/utils/TableGen/Basic/RuntimeLibcalls.h | 189 ++++++++++++ .../TableGen/Basic/RuntimeLibcallsEmitter.cpp | 275 ++---------------- 4 files changed, 308 insertions(+), 250 deletions(-) create mode 100644 llvm/utils/TableGen/Basic/RuntimeLibcalls.cpp create mode 100644 llvm/utils/TableGen/Basic/RuntimeLibcalls.h diff --git a/llvm/utils/TableGen/Basic/CMakeLists.txt b/llvm/utils/TableGen/Basic/CMakeLists.txt index b4a66ecce6440..2030e9add7f30 100644 --- a/llvm/utils/TableGen/Basic/CMakeLists.txt +++ b/llvm/utils/TableGen/Basic/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_library(LLVMTableGenBasic OBJECT EXCLUDE_FROM_ALL DISABLE_LLVM_LINK_LLV IntrinsicEmitter.cpp RISCVTargetDefEmitter.cpp RuntimeLibcallsEmitter.cpp + RuntimeLibcalls.cpp SDNodeProperties.cpp TableGen.cpp TargetFeaturesEmitter.cpp diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcalls.cpp b/llvm/utils/TableGen/Basic/RuntimeLibcalls.cpp new file mode 100644 index 0000000000000..1e609a2a8880b --- /dev/null +++ b/llvm/utils/TableGen/Basic/RuntimeLibcalls.cpp @@ -0,0 +1,93 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RuntimeLibcalls.h" +#include "llvm/TableGen/Error.h" + +using namespace llvm; + +RuntimeLibcalls::RuntimeLibcalls(const RecordKeeper &Records) { + ArrayRef AllRuntimeLibcalls = + Records.getAllDerivedDefinitions("RuntimeLibcall"); + + RuntimeLibcallDefList.reserve(AllRuntimeLibcalls.size()); + + size_t CallTypeEnumVal = 0; + for (const Record *RuntimeLibcallDef : AllRuntimeLibcalls) { + RuntimeLibcallDefList.emplace_back(RuntimeLibcallDef, CallTypeEnumVal++); + Def2RuntimeLibcall[RuntimeLibcallDef] = &RuntimeLibcallDefList.back(); + } + + for (RuntimeLibcall &LibCall : RuntimeLibcallDefList) + Def2RuntimeLibcall[LibCall.getDef()] = &LibCall; + + ArrayRef AllRuntimeLibcallImplsRaw = + Records.getAllDerivedDefinitions("RuntimeLibcallImpl"); + + SmallVector AllRuntimeLibcallImpls( + AllRuntimeLibcallImplsRaw); + + // Sort by libcall impl name and secondarily by the enum name. + sort(AllRuntimeLibcallImpls, [](const Record *A, const Record *B) { + return std::pair(A->getValueAsString("LibCallFuncName"), A->getName()) < + std::pair(B->getValueAsString("LibCallFuncName"), B->getName()); + }); + + RuntimeLibcallImplDefList.reserve(AllRuntimeLibcallImpls.size()); + + size_t LibCallImplEnumVal = 1; + for (const Record *LibCallImplDef : AllRuntimeLibcallImpls) { + RuntimeLibcallImplDefList.emplace_back(LibCallImplDef, Def2RuntimeLibcall, + LibCallImplEnumVal++); + + const RuntimeLibcallImpl &LibCallImpl = RuntimeLibcallImplDefList.back(); + Def2RuntimeLibcallImpl[LibCallImplDef] = &LibCallImpl; + + if (LibCallImpl.isDefault()) { + const RuntimeLibcall *Provides = LibCallImpl.getProvides(); + if (!Provides) + PrintFatalError(LibCallImplDef->getLoc(), + "default implementations must provide a libcall"); + LibCallToDefaultImpl[Provides] = &LibCallImpl; + } + } +} + +void LibcallPredicateExpander::expand(SetTheory &ST, const Record *Def, + SetTheory::RecSet &Elts) { + assert(Def->isSubClassOf("LibcallImpls")); + + SetTheory::RecSet TmpElts; + + ST.evaluate(Def->getValueInit("MemberList"), TmpElts, Def->getLoc()); + + Elts.insert(TmpElts.begin(), TmpElts.end()); + + AvailabilityPredicate AP(Def->getValueAsDef("AvailabilityPredicate")); + const Record *CCClass = Def->getValueAsOptionalDef("CallingConv"); + + // This is assuming we aren't conditionally applying a calling convention to + // some subsets, and not another, but this doesn't appear to be used. + + for (const Record *LibcallImplDef : TmpElts) { + const RuntimeLibcallImpl *LibcallImpl = + Libcalls.getRuntimeLibcallImpl(LibcallImplDef); + if (!AP.isAlwaysAvailable() || CCClass) { + auto [It, Inserted] = Func2Preds.insert({LibcallImpl, {{}, CCClass}}); + if (!Inserted) { + PrintError( + Def, + "combining nested libcall set predicates currently unhandled: '" + + LibcallImpl->getLibcallFuncName() + "'"); + } + + It->second.first.push_back(AP.getDef()); + It->second.second = CCClass; + } + } +} diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcalls.h b/llvm/utils/TableGen/Basic/RuntimeLibcalls.h new file mode 100644 index 0000000000000..6c9897602b2fa --- /dev/null +++ b/llvm/utils/TableGen/Basic/RuntimeLibcalls.h @@ -0,0 +1,189 @@ +//===------------------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_UTILS_TABLEGEN_COMMON_RUNTIMELIBCALLS_H +#define LLVM_UTILS_TABLEGEN_COMMON_RUNTIMELIBCALLS_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/SetTheory.h" + +namespace llvm { + +class AvailabilityPredicate { + const Record *TheDef; + StringRef PredicateString; + +public: + AvailabilityPredicate(const Record *Def) : TheDef(Def) { + if (TheDef) + PredicateString = TheDef->getValueAsString("Cond"); + } + + const Record *getDef() const { return TheDef; } + + bool isAlwaysAvailable() const { return PredicateString.empty(); } + + void emitIf(raw_ostream &OS) const { + OS << "if (" << PredicateString << ") {\n"; + } + + void emitEndIf(raw_ostream &OS) const { OS << "}\n"; } + + void emitTableVariableNameSuffix(raw_ostream &OS) const { + if (TheDef) + OS << '_' << TheDef->getName(); + } +}; + +class RuntimeLibcalls; +class RuntimeLibcallImpl; + +/// Used to apply predicates to nested sets of libcalls. +struct LibcallPredicateExpander : SetTheory::Expander { + const RuntimeLibcalls &Libcalls; + DenseMap, const Record *>> &Func2Preds; + + LibcallPredicateExpander( + const RuntimeLibcalls &Libcalls, + DenseMap, const Record *>> + &Func2Preds) + : Libcalls(Libcalls), Func2Preds(Func2Preds) {} + + void expand(SetTheory &ST, const Record *Def, + SetTheory::RecSet &Elts) override; +}; + +class RuntimeLibcall { + const Record *TheDef = nullptr; + const size_t EnumVal; + +public: + RuntimeLibcall() = delete; + RuntimeLibcall(const Record *Def, size_t EnumVal) + : TheDef(Def), EnumVal(EnumVal) { + assert(Def); + } + + ~RuntimeLibcall() { assert(TheDef); } + + const Record *getDef() const { return TheDef; } + + StringRef getName() const { return TheDef->getName(); } + + size_t getEnumVal() const { return EnumVal; } + + void emitEnumEntry(raw_ostream &OS) const { + OS << "RTLIB::" << TheDef->getValueAsString("Name"); + } +}; + +class RuntimeLibcallImpl { + const Record *TheDef; + const RuntimeLibcall *Provides = nullptr; + const size_t EnumVal; + +public: + RuntimeLibcallImpl( + const Record *Def, + const DenseMap &ProvideMap, + size_t EnumVal) + : TheDef(Def), EnumVal(EnumVal) { + if (const Record *ProvidesDef = Def->getValueAsDef("Provides")) + Provides = ProvideMap.lookup(ProvidesDef); + } + + ~RuntimeLibcallImpl() = default; + + const Record *getDef() const { return TheDef; } + + StringRef getName() const { return TheDef->getName(); } + + size_t getEnumVal() const { return EnumVal; } + + const RuntimeLibcall *getProvides() const { return Provides; } + + StringRef getLibcallFuncName() const { + return TheDef->getValueAsString("LibCallFuncName"); + } + + const Record *getCallingConv() const { + return TheDef->getValueAsOptionalDef("CallingConv"); + } + + void emitQuotedLibcallFuncName(raw_ostream &OS) const { + OS << '\"' << getLibcallFuncName() << '\"'; + } + + bool isDefault() const { return TheDef->getValueAsBit("IsDefault"); } + + void emitEnumEntry(raw_ostream &OS) const { + OS << "RTLIB::impl_" << this->getName(); + } + + void emitSetImplCall(raw_ostream &OS) const { + OS << "setLibcallImpl("; + Provides->emitEnumEntry(OS); + OS << ", "; + emitEnumEntry(OS); + OS << "); // " << getLibcallFuncName() << '\n'; + } + + void emitTableEntry(raw_ostream &OS) const { + OS << '{'; + Provides->emitEnumEntry(OS); + OS << ", "; + emitEnumEntry(OS); + OS << "}, // " << getLibcallFuncName() << '\n'; + } + + void emitSetCallingConv(raw_ostream &OS) const {} +}; + +struct LibcallsWithCC { + std::vector LibcallImpls; + const Record *CallingConv = nullptr; +}; + +class RuntimeLibcalls { +private: + DenseMap Def2RuntimeLibcall; + DenseMap Def2RuntimeLibcallImpl; + + std::vector RuntimeLibcallDefList; + std::vector RuntimeLibcallImplDefList; + + DenseMap + LibCallToDefaultImpl; + +public: + RuntimeLibcalls(const RecordKeeper &Records); + + ArrayRef getRuntimeLibcallDefList() const { + return RuntimeLibcallDefList; + } + + ArrayRef getRuntimeLibcallImplDefList() const { + return RuntimeLibcallImplDefList; + } + + const RuntimeLibcall *getRuntimeLibcall(const Record *Def) const { + return Def2RuntimeLibcall.lookup(Def); + } + + const RuntimeLibcallImpl *getRuntimeLibcallImpl(const Record *Def) const { + return Def2RuntimeLibcallImpl.lookup(Def); + } +}; + +} // namespace llvm + +#endif // LLVM_UTILS_TABLEGEN_COMMON_RUNTIMELIBCALLS_H diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp index 001ca7b658d3c..7aca87a63d0a2 100644 --- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp +++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp @@ -8,6 +8,8 @@ #define DEBUG_TYPE "runtime-libcall-emitter" +#include "RuntimeLibcalls.h" + #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" @@ -65,160 +67,12 @@ template <> struct DenseMapInfo { return LHS == RHS; } }; -} // namespace llvm - -namespace { - -class AvailabilityPredicate { - const Record *TheDef; - StringRef PredicateString; - -public: - AvailabilityPredicate(const Record *Def) : TheDef(Def) { - if (TheDef) - PredicateString = TheDef->getValueAsString("Cond"); - } - - const Record *getDef() const { return TheDef; } - - bool isAlwaysAvailable() const { return PredicateString.empty(); } - - void emitIf(raw_ostream &OS) const { - OS << "if (" << PredicateString << ") {\n"; - } - - void emitEndIf(raw_ostream &OS) const { OS << "}\n"; } - - void emitTableVariableNameSuffix(raw_ostream &OS) const { - if (TheDef) - OS << '_' << TheDef->getName(); - } -}; - -class RuntimeLibcallEmitter; -class RuntimeLibcallImpl; - -/// Used to apply predicates to nested sets of libcalls. -struct LibcallPredicateExpander : SetTheory::Expander { - const RuntimeLibcallEmitter &LibcallEmitter; - DenseMap, const Record *>> &Func2Preds; - - LibcallPredicateExpander( - const RuntimeLibcallEmitter &LibcallEmitter, - DenseMap, const Record *>> - &Func2Preds) - : LibcallEmitter(LibcallEmitter), Func2Preds(Func2Preds) {} - - void expand(SetTheory &ST, const Record *Def, - SetTheory::RecSet &Elts) override; -}; - -class RuntimeLibcall { - const Record *TheDef = nullptr; - const size_t EnumVal; - -public: - RuntimeLibcall() = delete; - RuntimeLibcall(const Record *Def, size_t EnumVal) - : TheDef(Def), EnumVal(EnumVal) { - assert(Def); - } - - ~RuntimeLibcall() { assert(TheDef); } - - const Record *getDef() const { return TheDef; } - - StringRef getName() const { return TheDef->getName(); } - - size_t getEnumVal() const { return EnumVal; } - - void emitEnumEntry(raw_ostream &OS) const { - OS << "RTLIB::" << TheDef->getValueAsString("Name"); - } -}; - -class RuntimeLibcallImpl { - const Record *TheDef; - const RuntimeLibcall *Provides = nullptr; - const size_t EnumVal; - -public: - RuntimeLibcallImpl( - const Record *Def, - const DenseMap &ProvideMap, - size_t EnumVal) - : TheDef(Def), EnumVal(EnumVal) { - if (const Record *ProvidesDef = Def->getValueAsDef("Provides")) - Provides = ProvideMap.lookup(ProvidesDef); - } - - ~RuntimeLibcallImpl() = default; - - const Record *getDef() const { return TheDef; } - - StringRef getName() const { return TheDef->getName(); } - - size_t getEnumVal() const { return EnumVal; } - - const RuntimeLibcall *getProvides() const { return Provides; } - - StringRef getLibcallFuncName() const { - return TheDef->getValueAsString("LibCallFuncName"); - } - - const Record *getCallingConv() const { - return TheDef->getValueAsOptionalDef("CallingConv"); - } - - void emitQuotedLibcallFuncName(raw_ostream &OS) const { - OS << '\"' << getLibcallFuncName() << '\"'; - } - - bool isDefault() const { return TheDef->getValueAsBit("IsDefault"); } - - void emitEnumEntry(raw_ostream &OS) const { - OS << "RTLIB::impl_" << this->getName(); - } - - void emitSetImplCall(raw_ostream &OS) const { - OS << "setLibcallImpl("; - Provides->emitEnumEntry(OS); - OS << ", "; - emitEnumEntry(OS); - OS << "); // " << getLibcallFuncName() << '\n'; - } - - void emitTableEntry(raw_ostream &OS) const { - OS << '{'; - Provides->emitEnumEntry(OS); - OS << ", "; - emitEnumEntry(OS); - OS << "}, // " << getLibcallFuncName() << '\n'; - } - - void emitSetCallingConv(raw_ostream &OS) const {} -}; - -struct LibcallsWithCC { - std::vector LibcallImpls; - const Record *CallingConv = nullptr; -}; class RuntimeLibcallEmitter { private: const RecordKeeper &Records; - DenseMap Def2RuntimeLibcall; - DenseMap Def2RuntimeLibcallImpl; - - std::vector RuntimeLibcallDefList; - std::vector RuntimeLibcallImplDefList; - - DenseMap - LibCallToDefaultImpl; + RuntimeLibcalls Libcalls; -private: void emitGetRuntimeLibcallEnum(raw_ostream &OS) const; void emitNameMatchHashTable(raw_ostream &OS, @@ -229,61 +83,7 @@ class RuntimeLibcallEmitter { void emitSystemRuntimeLibrarySetCalls(raw_ostream &OS) const; public: - RuntimeLibcallEmitter(const RecordKeeper &R) : Records(R) { - - ArrayRef AllRuntimeLibcalls = - Records.getAllDerivedDefinitions("RuntimeLibcall"); - - RuntimeLibcallDefList.reserve(AllRuntimeLibcalls.size()); - - size_t CallTypeEnumVal = 0; - for (const Record *RuntimeLibcallDef : AllRuntimeLibcalls) { - RuntimeLibcallDefList.emplace_back(RuntimeLibcallDef, CallTypeEnumVal++); - Def2RuntimeLibcall[RuntimeLibcallDef] = &RuntimeLibcallDefList.back(); - } - - for (RuntimeLibcall &LibCall : RuntimeLibcallDefList) - Def2RuntimeLibcall[LibCall.getDef()] = &LibCall; - - ArrayRef AllRuntimeLibcallImplsRaw = - Records.getAllDerivedDefinitions("RuntimeLibcallImpl"); - - SmallVector AllRuntimeLibcallImpls( - AllRuntimeLibcallImplsRaw); - - // Sort by libcall impl name and secondarily by the enum name. - sort(AllRuntimeLibcallImpls, [](const Record *A, const Record *B) { - return std::pair(A->getValueAsString("LibCallFuncName"), A->getName()) < - std::pair(B->getValueAsString("LibCallFuncName"), B->getName()); - }); - - RuntimeLibcallImplDefList.reserve(AllRuntimeLibcallImpls.size()); - - size_t LibCallImplEnumVal = 1; - for (const Record *LibCallImplDef : AllRuntimeLibcallImpls) { - RuntimeLibcallImplDefList.emplace_back(LibCallImplDef, Def2RuntimeLibcall, - LibCallImplEnumVal++); - - const RuntimeLibcallImpl &LibCallImpl = RuntimeLibcallImplDefList.back(); - Def2RuntimeLibcallImpl[LibCallImplDef] = &LibCallImpl; - - if (LibCallImpl.isDefault()) { - const RuntimeLibcall *Provides = LibCallImpl.getProvides(); - if (!Provides) - PrintFatalError(LibCallImplDef->getLoc(), - "default implementations must provide a libcall"); - LibCallToDefaultImpl[Provides] = &LibCallImpl; - } - } - } - - const RuntimeLibcall *getRuntimeLibcall(const Record *Def) const { - return Def2RuntimeLibcall.lookup(Def); - } - - const RuntimeLibcallImpl *getRuntimeLibcallImpl(const Record *Def) const { - return Def2RuntimeLibcallImpl.lookup(Def); - } + RuntimeLibcallEmitter(const RecordKeeper &R) : Records(R), Libcalls(R) {} void run(raw_ostream &OS); }; @@ -297,24 +97,25 @@ void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const { "namespace RTLIB {\n" "enum Libcall : unsigned short {\n"; - for (const RuntimeLibcall &LibCall : RuntimeLibcallDefList) { + for (const RuntimeLibcall &LibCall : Libcalls.getRuntimeLibcallDefList()) { StringRef Name = LibCall.getName(); OS << " " << Name << " = " << LibCall.getEnumVal() << ",\n"; } - OS << " UNKNOWN_LIBCALL = " << RuntimeLibcallDefList.size() + OS << " UNKNOWN_LIBCALL = " << Libcalls.getRuntimeLibcallDefList().size() << "\n};\n\n" "enum LibcallImpl : unsigned short {\n" " Unsupported = 0,\n"; - for (const RuntimeLibcallImpl &LibCall : RuntimeLibcallImplDefList) { + for (const RuntimeLibcallImpl &LibCall : + Libcalls.getRuntimeLibcallImplDefList()) { OS << " impl_" << LibCall.getName() << " = " << LibCall.getEnumVal() << ", // " << LibCall.getLibcallFuncName() << '\n'; } OS << "};\n" << "constexpr size_t NumLibcallImpls = " - << RuntimeLibcallImplDefList.size() + 1 + << Libcalls.getRuntimeLibcallImplDefList().size() + 1 << ";\n" "} // End namespace RTLIB\n" "} // End namespace llvm\n"; @@ -394,6 +195,8 @@ constructPerfectHashTable(ArrayRef Keywords, /// Generate hash table based lookup by name. void RuntimeLibcallEmitter::emitNameMatchHashTable( raw_ostream &OS, StringToOffsetTable &OffsetTable) const { + ArrayRef RuntimeLibcallImplDefList = + Libcalls.getRuntimeLibcallImplDefList(); std::vector Hashes(RuntimeLibcallImplDefList.size()); std::vector TableValues(RuntimeLibcallImplDefList.size()); DenseSet SeenFuncNames; @@ -495,7 +298,8 @@ void RuntimeLibcallEmitter::emitGetInitRuntimeLibcallNames( { IfDefEmitter IfDef(OS, "GET_INIT_RUNTIME_LIBCALL_NAMES"); - for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) + for (const RuntimeLibcallImpl &LibCallImpl : + Libcalls.getRuntimeLibcallImplDefList()) Table.GetOrAddStringOffset(LibCallImpl.getLibcallFuncName()); Table.EmitStringTableDef(OS, "RuntimeLibcallImplNameTable"); @@ -505,7 +309,8 @@ const uint16_t RTLIB::RuntimeLibcallsInfo::RuntimeLibcallNameOffsetTable[] = { OS << formatv(" {}, // {}\n", Table.GetStringOffset(""), ""); // Unsupported entry - for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) { + for (const RuntimeLibcallImpl &LibCallImpl : + Libcalls.getRuntimeLibcallImplDefList()) { StringRef ImplName = LibCallImpl.getLibcallFuncName(); OS << formatv(" {}, // {}\n", Table.GetStringOffset(ImplName), ImplName); } @@ -516,7 +321,8 @@ const uint8_t RTLIB::RuntimeLibcallsInfo::RuntimeLibcallNameSizeTable[] = { )"; OS << " 0,\n"; - for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) + for (const RuntimeLibcallImpl &LibCallImpl : + Libcalls.getRuntimeLibcallImplDefList()) OS << " " << LibCallImpl.getLibcallFuncName().size() << ",\n"; OS << "};\n\n"; @@ -525,7 +331,8 @@ const uint8_t RTLIB::RuntimeLibcallsInfo::RuntimeLibcallNameSizeTable[] = { "ImplToLibcall[RTLIB::NumLibcallImpls] = {\n" " RTLIB::UNKNOWN_LIBCALL, // RTLIB::Unsupported\n"; - for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) { + for (const RuntimeLibcallImpl &LibCallImpl : + Libcalls.getRuntimeLibcallImplDefList()) { const RuntimeLibcall *Provides = LibCallImpl.getProvides(); OS << " "; Provides->emitEnumEntry(OS); @@ -533,6 +340,7 @@ const uint8_t RTLIB::RuntimeLibcallsInfo::RuntimeLibcallNameSizeTable[] = { LibCallImpl.emitEnumEntry(OS); OS << '\n'; } + OS << "};\n\n"; } @@ -576,7 +384,7 @@ void RuntimeLibcallEmitter::emitSystemRuntimeLibrarySetCalls( std::pair, const Record *>> Func2Preds; Sets.addExpander("LibcallImpls", std::make_unique( - *this, Func2Preds)); + Libcalls, Func2Preds)); const SetTheory::RecVec *Elements = Sets.expand(R->getValueAsDef("MemberList")); @@ -589,11 +397,12 @@ void RuntimeLibcallEmitter::emitSystemRuntimeLibrarySetCalls( constexpr unsigned BitsPerStorageElt = 64; DenseMap Pred2Funcs; - SmallVector BitsetValues( - divideCeil(RuntimeLibcallImplDefList.size() + 1, BitsPerStorageElt)); + SmallVector BitsetValues(divideCeil( + Libcalls.getRuntimeLibcallImplDefList().size() + 1, BitsPerStorageElt)); for (const Record *Elt : *Elements) { - const RuntimeLibcallImpl *LibCallImpl = getRuntimeLibcallImpl(Elt); + const RuntimeLibcallImpl *LibCallImpl = + Libcalls.getRuntimeLibcallImpl(Elt); if (!LibCallImpl) { PrintError(R, "entry for SystemLibrary is not a RuntimeLibcallImpl"); PrintNote(Elt->getLoc(), "invalid entry `" + Elt->getName() + "`"); @@ -761,39 +570,5 @@ void RuntimeLibcallEmitter::run(raw_ostream &OS) { } } -void LibcallPredicateExpander::expand(SetTheory &ST, const Record *Def, - SetTheory::RecSet &Elts) { - assert(Def->isSubClassOf("LibcallImpls")); - - SetTheory::RecSet TmpElts; - - ST.evaluate(Def->getValueInit("MemberList"), TmpElts, Def->getLoc()); - - Elts.insert(TmpElts.begin(), TmpElts.end()); - - AvailabilityPredicate AP(Def->getValueAsDef("AvailabilityPredicate")); - const Record *CCClass = Def->getValueAsOptionalDef("CallingConv"); - - // This is assuming we aren't conditionally applying a calling convention to - // some subsets, and not another, but this doesn't appear to be used. - - for (const Record *LibcallImplDef : TmpElts) { - const RuntimeLibcallImpl *LibcallImpl = - LibcallEmitter.getRuntimeLibcallImpl(LibcallImplDef); - if (!AP.isAlwaysAvailable() || CCClass) { - auto [It, Inserted] = Func2Preds.insert({LibcallImpl, {{}, CCClass}}); - if (!Inserted) { - PrintError( - Def, - "combining nested libcall set predicates currently unhandled: '" + - LibcallImpl->getLibcallFuncName() + "'"); - } - - It->second.first.push_back(AP.getDef()); - It->second.second = CCClass; - } - } -} - static TableGen::Emitter::OptClass X("gen-runtime-libcalls", "Generate RuntimeLibcalls"); From b0ae054a568622982e7e623c354709a7463b152a Mon Sep 17 00:00:00 2001 From: YongKang Zhu Date: Wed, 5 Nov 2025 12:01:58 -0800 Subject: [PATCH 55/61] [BOLT][AArch64] Fix LDR relocation type in ADRP+LDR sequence (#166391) `R_AARCH64_ADD_ABS_LO12_NC` is for the `ADD` instruction in the `ADRP+ADD` sequence. For `ADRP+LDR` sequence generated in LDR relaxation, relocation type for `LDR` should be `R_AARCH64_LDST64_ABS_LO12_NC` if it is 64-bit integer load or `R_AARCH64_LDST32_ABS_LO12_NC` if 32-bit. Sorry should have included this in #165787. --- bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 57db6a436c5c6..3c77091d91ebd 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -640,7 +640,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { Insts[1].addOperand(MCOperand::createImm(0)); Insts[1].addOperand(MCOperand::createImm(0)); setOperandToSymbolRef(Insts[1], /* OpNum */ 2, Target, 0, Ctx, - ELF::R_AARCH64_ADD_ABS_LO12_NC); + isLDRXl(LDRInst) ? ELF::R_AARCH64_LDST64_ABS_LO12_NC + : ELF::R_AARCH64_LDST32_ABS_LO12_NC); return Insts; } From f76c132230326a296c4fb8f7cb6c0fb6b943fadb Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 5 Nov 2025 12:02:33 -0800 Subject: [PATCH 56/61] [SimplifyCFG] Fix weight calculation for `simplifySwitchOfPowersOfTwo` (#165956) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Continued from #165804 This maintains the BFI of the default branch. Originally `10/63`​, post-pass, it ends up being `5/63 + 58/63 * 5/58`​(first term is from `PROF`​, second is the probability of going to the switch lookup times the probability, there, of taking the default branch) Issue #147390 --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 21 ++++++++++++------- .../X86/switch-of-powers-of-two.ll | 2 +- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 9a8dbebe5bfba..37c048f421f1a 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7731,19 +7731,24 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, // label. The other is those powers of 2 that don't appear in the case // statement. We don't know the distribution of the values coming in, so // the safest is to split 50-50 the original probability to `default`. - uint64_t OrigDenominator = sum_of(map_range( - Weights, [](const auto &V) { return static_cast(V); })); + uint64_t OrigDenominator = + sum_of(map_range(Weights, StaticCastTo)); SmallVector NewWeights(2); NewWeights[1] = Weights[0] / 2; NewWeights[0] = OrigDenominator - NewWeights[1]; setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false); - - // For the original switch, we reduce the weight of the default by the - // amount by which the previous branch contributes to getting to default, - // and then make sure the remaining weights have the same relative ratio - // wrt eachother. + // The probability of executing the default block stays constant. It was + // p_d = Weights[0] / OrigDenominator + // we rewrite as W/D + // We want to find the probability of the default branch of the switch + // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D) + // i.e. the original probability is the probability we go to the default + // branch from the BI branch, or we take the default branch on the SI. + // Meaning X = W / (2D - W), or (W/2) / (D - W/2) + // This matches using W/2 for the default branch probability numerator and + // D-W/2 as the denominator. + Weights[0] = NewWeights[1]; uint64_t CasesDenominator = OrigDenominator - Weights[0]; - Weights[0] /= 2; for (auto &W : drop_begin(Weights)) W = NewWeights[0] * static_cast(W) / CasesDenominator; diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll index d818335f075e5..e48c2b46a138a 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll @@ -141,5 +141,5 @@ return: ;. ; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 58, i32 5} -; CHECK: [[PROF2]] = !{!"branch_weights", i32 56, i32 5} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 53, i32 5} ;. From 78d649199b47370b72848c1ca8d9bd3323b050ac Mon Sep 17 00:00:00 2001 From: Ramkrishnan Date: Wed, 5 Nov 2025 15:06:30 -0500 Subject: [PATCH 57/61] [InterleavedAccess] Construct interleaved access store with shuffles Cost of interleaved store of 8 factor and 16 factor are cheaper in AArch64 with additional interleave instructions. --- llvm/include/llvm/CodeGen/TargetLowering.h | 5 + llvm/lib/CodeGen/InterleavedAccessPass.cpp | 13 +- .../Target/AArch64/AArch64ISelLowering.cpp | 131 +++++++++++++++++- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 7 + .../AArch64/AArch64TargetTransformInfo.cpp | 38 ++++- llvm/test/CodeGen/AArch64/vldn_shuffle.ll | 105 ++++++++++++++ .../AArch64/interleaved_store.ll | 117 ++++++++++++++++ .../AArch64/replicating-load-store-costs.ll | 2 +- .../PhaseOrdering/AArch64/interleave_vec.ll | 16 +-- 9 files changed, 416 insertions(+), 18 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/interleaved_store.ll diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 2550c2bee5f71..8aeaa9cdacfc1 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3233,6 +3233,11 @@ class LLVM_ABI TargetLoweringBase { /// Default to be the minimum interleave factor: 2. virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } + /// Return true if the target interleave with shuffles are cheaper + virtual bool isProfitableToInterleaveWithGatherScatter() const { + return false; + } + /// Lower an interleaved load to target specific intrinsics. Return /// true on success. /// diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 5c27a20869f81..45eca28ffb8a2 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -239,7 +239,8 @@ static bool isDeInterleaveMask(ArrayRef Mask, unsigned &Factor, /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...> /// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7> static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, - unsigned MaxFactor) { + unsigned MaxFactor, + bool InterleaveWithShuffles) { unsigned NumElts = SVI->getShuffleMask().size(); if (NumElts < 4) return false; @@ -250,6 +251,13 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, return true; } + if (InterleaveWithShuffles) { + for (unsigned i = 1; MaxFactor * i <= 16; i *= 2) { + Factor = i * MaxFactor; + if (SVI->isInterleave(Factor)) + return true; + } + } return false; } @@ -528,7 +536,8 @@ bool InterleavedAccessImpl::lowerInterleavedStore( cast(SVI->getType())->getNumElements(); // Check if the shufflevector is RE-interleave shuffle. unsigned Factor; - if (!isReInterleaveMask(SVI, Factor, MaxFactor)) + if (!isReInterleaveMask(SVI, Factor, MaxFactor, + TLI->isProfitableToInterleaveWithGatherScatter())) return false; assert(NumStoredElements % Factor == 0 && "number of stored element should be a multiple of Factor"); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d08f9b94227a2..298746863d221 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -96,6 +96,7 @@ #include #include #include +#include #include #include #include @@ -17989,11 +17990,17 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store, unsigned Factor, const APInt &GapMask) const { - assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && - "Invalid interleave factor"); auto *SI = dyn_cast(Store); if (!SI) return false; + + if (isProfitableToInterleaveWithGatherScatter() && + Factor > getMaxSupportedInterleaveFactor()) + return lowerInterleavedStoreWithShuffle(SI, SVI, Factor); + + assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && + "Invalid interleave factor"); + assert(!LaneMask && GapMask.popcount() == Factor && "Unexpected mask on store"); @@ -18139,6 +18146,126 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store, return true; } +/// If the interleaved vector elements are greater than supported MaxFactor, +/// interleaving the data with additional shuffles can be used to +/// achieve the same. +/// +/// Consider the following data with 8 interleaves which are shuffled to store +/// stN instructions. Data needs to be stored in this order: +/// [v0, v1, v2, v3, v4, v5, v6, v7] +/// +/// v0 v4 v2 v6 v1 v5 v3 v7 +/// | | | | | | | | +/// \ / \ / \ / \ / +/// [zip v0,v4] [zip v2,v6] [zip v1,v5] [zip v3,v7] ==> stN = 4 +/// | | | | +/// \ / \ / +/// \ / \ / +/// \ / \ / +/// [zip [v0,v2,v4,v6]] [zip [v1,v3,v5,v7]] ==> stN = 2 +/// +/// For stN = 4, upper half of interleaved data V0, V1, V2, V3 is stored +/// with one st4 instruction. Lower half, i.e, V4, V5, V6, V7 is stored with +/// another st4. +/// +/// For stN = 2, upper half of interleaved data V0, V1 is stored +/// with one st2 instruction. Second set V2, V3 is stored with another st2. +/// Total of 4 st2's are required here. +bool AArch64TargetLowering::lowerInterleavedStoreWithShuffle( + StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const { + unsigned MaxSupportedFactor = getMaxSupportedInterleaveFactor(); + + auto *VecTy = cast(SVI->getType()); + assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store"); + + unsigned LaneLen = VecTy->getNumElements() / Factor; + Type *EltTy = VecTy->getElementType(); + auto *SubVecTy = FixedVectorType::get(EltTy, Factor); + + const DataLayout &DL = SI->getModule()->getDataLayout(); + bool UseScalable; + + // Skip if we do not have NEON and skip illegal vector types. We can + // "legalize" wide vector types into multiple interleaved accesses as long as + // the vector types are divisible by 128. + if (!Subtarget->hasNEON() || + !isLegalInterleavedAccessType(SubVecTy, DL, UseScalable)) + return false; + + if (UseScalable) + return false; + + std::deque Shuffles; + Shuffles.push_back(SVI); + unsigned ConcatLevel = Factor; + // Getting all the interleaved operands. + while (ConcatLevel > 1) { + unsigned InterleavedOperands = Shuffles.size(); + for (unsigned i = 0; i < InterleavedOperands; i++) { + ShuffleVectorInst *SFL = dyn_cast(Shuffles.front()); + if (!SFL) + return false; + Shuffles.pop_front(); + + Value *Op0 = SFL->getOperand(0); + Value *Op1 = SFL->getOperand(1); + + Shuffles.push_back(dyn_cast(Op0)); + Shuffles.push_back(dyn_cast(Op1)); + } + ConcatLevel >>= 1; + } + + IRBuilder<> Builder(SI); + auto Mask = createInterleaveMask(LaneLen, 2); + SmallVector UpperHalfMask(LaneLen), LowerHalfMask(LaneLen); + for (unsigned i = 0; i < LaneLen; i++) { + LowerHalfMask[i] = Mask[i]; + UpperHalfMask[i] = Mask[i + LaneLen]; + } + + unsigned InterleaveFactor = Factor >> 1; + while (InterleaveFactor >= MaxSupportedFactor) { + std::deque ShufflesIntermediate; + ShufflesIntermediate.resize(Factor); + for (unsigned j = 0; j < Factor; j += (InterleaveFactor * 2)) { + for (unsigned i = 0; i < InterleaveFactor; i++) { + auto *Shuffle = Builder.CreateShuffleVector( + Shuffles[i + j], Shuffles[i + j + InterleaveFactor], LowerHalfMask); + ShufflesIntermediate[i + j] = Shuffle; + Shuffle = Builder.CreateShuffleVector( + Shuffles[i + j], Shuffles[i + j + InterleaveFactor], UpperHalfMask); + ShufflesIntermediate[i + j + InterleaveFactor] = Shuffle; + } + } + Shuffles = ShufflesIntermediate; + InterleaveFactor >>= 1; + } + + Type *PtrTy = SI->getPointerOperandType(); + auto *STVTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen); + + Value *BaseAddr = SI->getPointerOperand(); + Function *StNFunc = getStructuredStoreFunction( + SI->getModule(), MaxSupportedFactor, UseScalable, STVTy, PtrTy); + for (unsigned i = 0; i < (Factor / MaxSupportedFactor); i++) { + SmallVector Ops; + for (unsigned j = 0; j < MaxSupportedFactor; j++) + Ops.push_back(Shuffles[i * MaxSupportedFactor + j]); + + if (i > 0) { + // We will compute the pointer operand of each store from the original + // base address using GEPs. Cast the base address to a pointer to the + // scalar element type. + BaseAddr = Builder.CreateConstGEP1_32( + SubVecTy->getElementType(), BaseAddr, LaneLen * MaxSupportedFactor); + } + Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy)); + Builder.CreateCall(StNFunc, Ops); + } + return true; +} + bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( Instruction *Load, Value *Mask, IntrinsicInst *DI) const { const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID()); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 70bfae717fb76..bfd8474bfeec9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -229,6 +229,10 @@ class AArch64TargetLowering : public TargetLowering { bool hasPairedLoad(EVT LoadedType, Align &RequiredAlignment) const override; + bool isProfitableToInterleaveWithGatherScatter() const override { + return true; + } + unsigned getMaxSupportedInterleaveFactor() const override { return 4; } bool lowerInterleavedLoad(Instruction *Load, Value *Mask, @@ -239,6 +243,9 @@ class AArch64TargetLowering : public TargetLowering { ShuffleVectorInst *SVI, unsigned Factor, const APInt &GapMask) const override; + bool lowerInterleavedStoreWithShuffle(StoreInst *SI, ShuffleVectorInst *SVI, + unsigned Factor) const; + bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, IntrinsicInst *DI) const override; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 197aae6e03cb1..8729ed3890131 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -4922,11 +4922,36 @@ InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost( if (!VecTy->isScalableTy() && (UseMaskForCond || UseMaskForGaps)) return InstructionCost::getInvalid(); - if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) { + unsigned NumLoadStores = 1; + InstructionCost ShuffleCost = 0; + bool isInterleaveWithShuffle = false; + unsigned MaxSupportedFactor = TLI->getMaxSupportedInterleaveFactor(); + + auto *SubVecTy = + VectorType::get(VecVTy->getElementType(), + VecVTy->getElementCount().divideCoefficientBy(Factor)); + + if (TLI->isProfitableToInterleaveWithGatherScatter() && + Opcode == Instruction::Store && (0 == Factor % MaxSupportedFactor) && + Factor > MaxSupportedFactor) { + isInterleaveWithShuffle = true; + SmallVector Mask; + // preparing interleave Mask. + for (unsigned i = 0; i < VecVTy->getElementCount().getKnownMinValue() / 2; + i++) { + for (unsigned j = 0; j < 2; j++) + Mask.push_back(j * Factor + i); + } + + NumLoadStores = Factor / MaxSupportedFactor; + ShuffleCost = + (Factor * getShuffleCost(TargetTransformInfo::SK_Splice, VecVTy, VecVTy, + Mask, CostKind, 0, SubVecTy)); + } + + if (!UseMaskForGaps && + (Factor <= MaxSupportedFactor || isInterleaveWithShuffle)) { unsigned MinElts = VecVTy->getElementCount().getKnownMinValue(); - auto *SubVecTy = - VectorType::get(VecVTy->getElementType(), - VecVTy->getElementCount().divideCoefficientBy(Factor)); // ldN/stN only support legal vector types of size 64 or 128 in bits. // Accesses having vector types that are a multiple of 128 bits can be @@ -4934,7 +4959,10 @@ InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost( bool UseScalable; if (MinElts % Factor == 0 && TLI->isLegalInterleavedAccessType(SubVecTy, DL, UseScalable)) - return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL, UseScalable); + return (Factor * + TLI->getNumInterleavedAccesses(SubVecTy, DL, UseScalable) * + NumLoadStores) + + ShuffleCost; } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, diff --git a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll index 3685e9cf85bd6..b2635d3d9f1a5 100644 --- a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll +++ b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll @@ -730,6 +730,111 @@ entry: ret void } +define void @store_factor8(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3, + <4 x i32> %a4, <4 x i32> %a5, <4 x i32> %a6, <4 x i32> %a7) { +; CHECK-LABEL: store_factor8: +; CHECK: .Lfunc_begin17: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK: zip1 [[V1:.*s]], [[I1:.*s]], [[I5:.*s]] +; CHECK-NEXT: zip2 [[V5:.*s]], [[I1]], [[I5]] +; CHECK-NEXT: zip1 [[V2:.*s]], [[I2:.*s]], [[I6:.*s]] +; CHECK-NEXT: zip2 [[V6:.*s]], [[I2]], [[I6]] +; CHECK-NEXT: zip1 [[V3:.*s]], [[I3:.*s]], [[I7:.*s]] +; CHECK-NEXT: zip2 [[V7:.*s]], [[I3]], [[I7]] +; CHECK-NEXT: zip1 [[V4:.*s]], [[I4:.*s]], [[I8:.*s]] +; CHECK-NEXT: zip2 [[V8:.*s]], [[I4]], [[I8]] +; CHECK-NEXT: st4 { [[V1]], [[V2]], [[V3]], [[V4]] }, [x0], #64 +; CHECK-NEXT: st4 { [[V5]], [[V6]], [[V7]], [[V8]] }, [x0] +; CHECK-NEXT: ret + + %v0 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <8 x i32> + %v1 = shufflevector <4 x i32> %a2, <4 x i32> %a3, <8 x i32> + %v2 = shufflevector <4 x i32> %a4, <4 x i32> %a5, <8 x i32> + %v3 = shufflevector <4 x i32> %a6, <4 x i32> %a7, <8 x i32> + + %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> + %s1 = shufflevector <8 x i32> %v2, <8 x i32> %v3, <16 x i32> + + %interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <32 x i32> + store <32 x i32> %interleaved.vec, ptr %ptr, align 4 + ret void +} + +define void @store_factor16(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3, + <4 x i32> %a4, <4 x i32> %a5, <4 x i32> %a6, <4 x i32> %a7, + <4 x i32> %a8, <4 x i32> %a9, <4 x i32> %a10, <4 x i32> %a11, + <4 x i32> %a12, <4 x i32> %a13, <4 x i32> %a14, <4 x i32> %a15) { +; CHECK-LABEL: store_factor16: +; CHECK: .Lfunc_begin18: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK: zip1 [[V05:.*s]], [[I05:.*s]], [[I13:.*s]] +; CHECK-NEXT: zip1 [[V01:.*s]], [[I01:.*s]], [[I09:.*s]] +; CHECK-NEXT: zip1 [[V02:.*s]], [[I02:.*s]], [[I10:.*s]] +; CHECK-NEXT: zip1 [[V06:.*s]], [[I06:.*s]], [[I14:.*s]] +; CHECK-NEXT: zip1 [[V07:.*s]], [[I07:.*s]], [[I15:.*s]] +; CHECK-NEXT: zip2 [[V09:.*s]], [[I01]], [[I09]] +; CHECK-NEXT: zip2 [[V13:.*s]], [[I05]], [[I13]] +; CHECK-NEXT: zip1 [[V03:.*s]], [[I03:.*s]], [[I11:.*s]] +; CHECK-NEXT: zip1 [[V04:.*s]], [[I04:.*s]], [[I12:.*s]] +; CHECK-NEXT: zip1 [[V08:.*s]], [[I08:.*s]], [[I16:.*s]] +; CHECK-NEXT: zip2 [[V10:.*s]], [[I02]], [[I10]] +; CHECK-NEXT: zip2 [[V14:.*s]], [[I06]], [[I14]] +; CHECK-NEXT: zip2 [[V11:.*s]], [[I03]], [[I11]] +; CHECK-NEXT: zip1 [[V17:.*s]], [[V01]], [[V05]] +; CHECK-NEXT: zip2 [[V15:.*s]], [[I07]], [[I15]] +; CHECK-NEXT: zip2 [[V21:.*s]], [[V01]], [[V05]] +; CHECK-NEXT: zip1 [[V18:.*s]], [[V02]], [[V06]] +; CHECK-NEXT: zip2 [[V12:.*s]], [[I04]], [[I12]] +; CHECK-NEXT: zip2 [[V16:.*s]], [[I08]], [[I16]] +; CHECK-NEXT: zip1 [[V19:.*s]], [[V03]], [[V07]] +; CHECK-NEXT: zip2 [[V22:.*s]], [[V02]], [[V06]] +; CHECK-NEXT: zip1 [[V25:.*s]], [[V09]], [[V13]] +; CHECK-NEXT: zip1 [[V20:.*s]], [[V04]], [[V08]] +; CHECK-NEXT: zip2 [[V23:.*s]], [[V03]], [[V07]] +; CHECK-NEXT: zip1 [[V26:.*s]], [[V10]], [[V14]] +; CHECK-NEXT: zip2 [[V29:.*s]], [[V09]], [[V13]] +; CHECK-NEXT: zip2 [[V24:.*s]], [[V04]], [[V08]] +; CHECK-NEXT: zip1 [[V27:.*s]], [[V11]], [[V15]] +; CHECK-NEXT: zip2 [[V30:.*s]], [[V10]], [[V14]] +; CHECK-NEXT: zip1 [[V28:.*s]], [[V12]], [[V16]] +; CHECK-NEXT: zip2 [[V31:.*s]], [[V11]], [[V15]] +; CHECK-NEXT: zip2 [[V32:.*s]], [[V12]], [[V16]] +; CHECK-NEXT: st4 { [[V17]], [[V18]], [[V19]], [[V20]] }, [x8], #64 +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: st4 { [[V21]], [[V22]], [[V23]], [[V24]] }, [x8] +; CHECK-NEXT: add x8, x0, #128 +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: st4 { [[V25]], [[V26]], [[V27]], [[V28]] }, [x8] +; CHECK-NEXT: add x8, x0, #192 +; CHECK-NEXT: st4 { [[V29]], [[V30]], [[V31]], [[V32]] }, [x8] +; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: ret + + %v0 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <8 x i32> + %v1 = shufflevector <4 x i32> %a2, <4 x i32> %a3, <8 x i32> + %v2 = shufflevector <4 x i32> %a4, <4 x i32> %a5, <8 x i32> + %v3 = shufflevector <4 x i32> %a6, <4 x i32> %a7, <8 x i32> + %v4 = shufflevector <4 x i32> %a8, <4 x i32> %a9, <8 x i32> + %v5 = shufflevector <4 x i32> %a10, <4 x i32> %a11, <8 x i32> + %v6 = shufflevector <4 x i32> %a12, <4 x i32> %a13, <8 x i32> + %v7 = shufflevector <4 x i32> %a14, <4 x i32> %a15, <8 x i32> + + %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> + %s1 = shufflevector <8 x i32> %v2, <8 x i32> %v3, <16 x i32> + %s2 = shufflevector <8 x i32> %v4, <8 x i32> %v5, <16 x i32> + %s3 = shufflevector <8 x i32> %v6, <8 x i32> %v7, <16 x i32> + + %d0 = shufflevector <16 x i32> %s0, <16 x i32> %s1, <32 x i32> + %d1 = shufflevector <16 x i32> %s2, <16 x i32> %s3, <32 x i32> + + %interleaved.vec = shufflevector <32 x i32> %d0, <32 x i32> %d1, <64 x i32> + store <64 x i32> %interleaved.vec, ptr %ptr, align 4 + ret void +} + declare void @llvm.dbg.value(metadata, metadata, metadata) !llvm.dbg.cu = !{!0} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_store.ll new file mode 100644 index 0000000000000..bd5f4e2a3279b --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_store.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses=true -max-interleave-group-factor=16 -S < %s | FileCheck %s + +define dso_local void @_Z6unpackPhS_(ptr noalias noundef readonly captures(none) %in, ptr noalias noundef writeonly captures(none) %out) { +; CHECK-LABEL: define dso_local void @_Z6unpackPhS_( +; CHECK-SAME: ptr noalias noundef readonly captures(none) [[IN:%.*]], ptr noalias noundef writeonly captures(none) [[OUT:%.*]]) { +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[OUT]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], 4 +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[IN]], i64 [[OFFSET_IDX2]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i8>, ptr [[NEXT_GEP3]], align 1, !alias.scope [[META0:![0-9]+]] +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[STRIDED_VEC5]] +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[STRIDED_VEC4]] +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[STRIDED_VEC5]], [[STRIDED_VEC4]] +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[STRIDED_VEC]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i8> [[STRIDED_VEC5]], [[STRIDED_VEC]] +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i8> [[STRIDED_VEC4]], [[STRIDED_VEC]] +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[STRIDED_VEC5]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> zeroinitializer, <4 x i8> [[STRIDED_VEC6]], <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[STRIDED_VEC5]], <4 x i8> [[TMP0]], <8 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[STRIDED_VEC4]], <4 x i8> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP3]], <8 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[STRIDED_VEC]], <4 x i8> [[TMP4]], <8 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> [[TMP6]], <8 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> [[TMP8]], <8 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> [[TMP10]], <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i8> [[TMP11]], <8 x i8> [[TMP12]], <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i8> [[TMP13]], <8 x i8> [[TMP14]], <16 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP15]], <8 x i8> [[TMP16]], <16 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i8> [[TMP17]], <8 x i8> [[TMP18]], <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x i8> [[TMP19]], <16 x i8> [[TMP20]], <32 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP21]], <16 x i8> [[TMP22]], <32 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <32 x i8> [[TMP23]], <32 x i8> [[TMP24]], <64 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <64 x i8> [[TMP25]], <64 x i8> poison, <64 x i32> +; CHECK-NEXT: store <64 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 +; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %vector.body, !llvm.loop [[LOOP5:![0-9]+]] +; +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %entry, %for.body + %i.033 = phi i32 [ 0, %entry ], [ %inc17, %for.body ] + %out.addr.032 = phi ptr [ %out, %entry ], [ %add.ptr, %for.body ] + %in.addr.031 = phi ptr [ %in, %entry ], [ %add.ptr15, %for.body ] + store i8 0, ptr %out.addr.032, align 1 + %arrayidx10 = getelementptr inbounds nuw i8, ptr %in.addr.031, i64 3 + %0 = load i8, ptr %arrayidx10, align 1 + %arrayidx14 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 1 + store i8 %0, ptr %arrayidx14, align 1 + %arrayidx10.1 = getelementptr inbounds nuw i8, ptr %in.addr.031, i64 2 + %1 = load i8, ptr %arrayidx10.1, align 1 + %arrayidx14.1 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 2 + store i8 %1, ptr %arrayidx14.1, align 1 + %add.2 = add i8 %0, %1 + %arrayidx14.2 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 3 + store i8 %add.2, ptr %arrayidx14.2, align 1 + %arrayidx10.3 = getelementptr inbounds nuw i8, ptr %in.addr.031, i64 1 + %2 = load i8, ptr %arrayidx10.3, align 1 + %arrayidx14.3 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 4 + store i8 %2, ptr %arrayidx14.3, align 1 + %add.4 = add i8 %0, %2 + %arrayidx14.4 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 5 + store i8 %add.4, ptr %arrayidx14.4, align 1 + %add.5 = add i8 %1, %2 + %arrayidx14.5 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 6 + store i8 %add.5, ptr %arrayidx14.5, align 1 + %add.6 = add i8 %0, %add.5 + %arrayidx14.6 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 7 + store i8 %add.6, ptr %arrayidx14.6, align 1 + %3 = load i8, ptr %in.addr.031, align 1 + %arrayidx14.7 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 8 + store i8 %3, ptr %arrayidx14.7, align 1 + %add.8 = add i8 %0, %3 + %arrayidx14.8 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 9 + store i8 %add.8, ptr %arrayidx14.8, align 1 + %add.9 = add i8 %1, %3 + %arrayidx14.9 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 10 + store i8 %add.9, ptr %arrayidx14.9, align 1 + %add.10 = add i8 %0, %add.9 + %arrayidx14.10 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 11 + store i8 %add.10, ptr %arrayidx14.10, align 1 + %add.11 = add i8 %2, %3 + %arrayidx14.11 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 12 + store i8 %add.11, ptr %arrayidx14.11, align 1 + %add.12 = add i8 %0, %add.11 + %arrayidx14.12 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 13 + store i8 %add.12, ptr %arrayidx14.12, align 1 + %add.13 = add i8 %1, %add.11 + %arrayidx14.13 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 14 + store i8 %add.13, ptr %arrayidx14.13, align 1 + %add.14 = add i8 %0, %add.13 + %arrayidx14.14 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 15 + store i8 %add.14, ptr %arrayidx14.14, align 1 + %add.ptr = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 16 + %add.ptr15 = getelementptr inbounds nuw i8, ptr %in.addr.031, i64 4 + %inc17 = add nuw nsw i32 %i.033, 1 + %exitcond.not = icmp eq i32 %inc17, 32 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.mustprogress"} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll index 68cfc659e1e94..cdddcc9fc4226 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6 -; RUN: opt -p loop-vectorize -S %s | FileCheck %s +; RUN: opt -p loop-vectorize -max-interleave-group-factor=4 -S %s | FileCheck %s target triple = "arm64-apple-macosx15.0.0" diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll index f2ae327778f4a..54b7f2afe1ed0 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll @@ -925,20 +925,20 @@ define void @same_op8_splat(ptr noalias noundef %a, ptr noundef %b, ptr noundef ; CHECK-SAME: ptr noalias noundef captures(none) [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]], ptr noundef readonly captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[C]], align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_VEC19:%.*]] = load <16 x float>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <16 x float> [[WIDE_VEC]], [[TMP1]] -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x float> [[WIDE_VEC19]], [[TMP4]] -; CHECK-NEXT: store <16 x float> [[INTERLEAVED_VEC]], ptr [[TMP6]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[WIDE_VEC19:%.*]] = load <32 x float>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <32 x float> [[WIDE_VEC]], [[TMP1]] +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <32 x float> [[WIDE_VEC19]], [[TMP4]] +; CHECK-NEXT: store <32 x float> [[INTERLEAVED_VEC]], ptr [[TMP6]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 144 ; CHECK-NEXT: br i1 [[TMP25]], label %[[FOR_END11:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[FOR_END11]]: From 1fc5c02aa56ad4cef1391863dfc0922ef7110569 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 5 Nov 2025 12:13:48 -0800 Subject: [PATCH 58/61] [LVer][profcheck] explicitly set unknown branch weights for the versioned/unversioned selector (#164507) We don't have sufficient information to know when the versioned (or unversioned) loop variant will be taken, so we mark the branch as having "unknown" probabilities. Issue #147390 --- llvm/lib/Transforms/Utils/LoopVersioning.cpp | 9 +++++++-- .../Transforms/LoopDistribute/basic-with-memchecks.ll | 5 +++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp index ec2e6c1ab796b..9c8b6ef83e56d 100644 --- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -109,8 +110,12 @@ void LoopVersioning::versionLoop( // Insert the conditional branch based on the result of the memchecks. Instruction *OrigTerm = RuntimeCheckBB->getTerminator(); Builder.SetInsertPoint(OrigTerm); - Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(), - VersionedLoop->getLoopPreheader()); + auto *BI = + Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(), + VersionedLoop->getLoopPreheader()); + // We don't know what the probability of executing the versioned vs the + // unversioned variants is. + setExplicitlyUnknownBranchWeightsIfProfiled(*BI, DEBUG_TYPE); OrigTerm->eraseFromParent(); // The loops merge in the original exit block. This is now dominated by the diff --git a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll index 97ea2c6708dad..2828882afe779 100644 --- a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll +++ b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll @@ -28,7 +28,7 @@ target triple = "x86_64-apple-macosx10.10.0" @E = common global ptr null, align 8 ; CHECK-LABEL: @f( -define void @f() { +define void @f() !prof !{!"function_entry_count", i32 10} { entry: %a = load ptr, ptr @A, align 8 %b = load ptr, ptr @B, align 8 @@ -55,7 +55,7 @@ entry: ; CHECK: = icmp ; CHECK-NOT: = icmp -; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label %for.body.ph.ldist1 +; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label %for.body.ph.ldist1, !prof ![[PROF1:[0-9]]] ; The non-distributed loop that the memchecks fall back on. @@ -289,3 +289,4 @@ attributes #1 = { nounwind convergent } !0 = distinct !{!0, !1} !1 = !{!"llvm.loop.distribute.enable", i1 true} +; CHECK: ![[PROF1]] = !{!"unknown", !"loop-versioning"} From 163933e9e7099f352ff8df1973f9a9c3d7def6c5 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 5 Nov 2025 20:17:29 +0000 Subject: [PATCH 59/61] [gn build] Port 0469ff0a212d --- llvm/utils/gn/secondary/llvm/utils/TableGen/Basic/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/utils/TableGen/Basic/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/TableGen/Basic/BUILD.gn index 43916cef756ff..918132b38b6ed 100644 --- a/llvm/utils/gn/secondary/llvm/utils/TableGen/Basic/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/TableGen/Basic/BUILD.gn @@ -10,6 +10,7 @@ source_set("Basic") { "DirectiveEmitter.cpp", "IntrinsicEmitter.cpp", "RISCVTargetDefEmitter.cpp", + "RuntimeLibcalls.cpp", "RuntimeLibcallsEmitter.cpp", "SDNodeProperties.cpp", "TableGen.cpp", From 5f1b9023a8093fd8beb931a74d28753fbda88fdf Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Wed, 5 Nov 2025 12:36:57 -0800 Subject: [PATCH 60/61] [BOLT][AArch64] Fix printing of relocation types (#166621) Enumeration of relocation types is not always sequential, e.g. on AArch64 the first real relocation type is 0x101. As such, the existing code in `Relocation::print()` was crashing while printing AArch64 relocations. Fix it by using `llvm::object::getELFRelocationTypeName()`. --- bolt/lib/Core/Relocation.cpp | 38 ++++------------------- bolt/test/AArch64/relocation-type-print.s | 24 ++++++++++++++ 2 files changed, 30 insertions(+), 32 deletions(-) create mode 100644 bolt/test/AArch64/relocation-type-print.s diff --git a/bolt/lib/Core/Relocation.cpp b/bolt/lib/Core/Relocation.cpp index 4b827b647b06c..f872db2cae0ce 100644 --- a/bolt/lib/Core/Relocation.cpp +++ b/bolt/lib/Core/Relocation.cpp @@ -1018,41 +1018,15 @@ void Relocation::print(raw_ostream &OS) const { default: OS << "RType:" << Twine::utohexstr(Type); break; - - case Triple::aarch64: { - static const char *const AArch64RelocNames[] = { -#define ELF_RELOC(name, value) #name, -#include "llvm/BinaryFormat/ELFRelocs/AArch64.def" -#undef ELF_RELOC - }; - assert(Type < ArrayRef(AArch64RelocNames).size()); - OS << AArch64RelocNames[Type]; - } break; - + case Triple::aarch64: + OS << object::getELFRelocationTypeName(ELF::EM_AARCH64, Type); + break; case Triple::riscv64: - // RISC-V relocations are not sequentially numbered so we cannot use an - // array - switch (Type) { - default: - llvm_unreachable("illegal RISC-V relocation"); -#define ELF_RELOC(name, value) \ - case value: \ - OS << #name; \ + OS << object::getELFRelocationTypeName(ELF::EM_RISCV, Type); break; -#include "llvm/BinaryFormat/ELFRelocs/RISCV.def" -#undef ELF_RELOC - } + case Triple::x86_64: + OS << object::getELFRelocationTypeName(ELF::EM_X86_64, Type); break; - - case Triple::x86_64: { - static const char *const X86RelocNames[] = { -#define ELF_RELOC(name, value) #name, -#include "llvm/BinaryFormat/ELFRelocs/x86_64.def" -#undef ELF_RELOC - }; - assert(Type < ArrayRef(X86RelocNames).size()); - OS << X86RelocNames[Type]; - } break; } OS << ", 0x" << Twine::utohexstr(Offset); if (Symbol) { diff --git a/bolt/test/AArch64/relocation-type-print.s b/bolt/test/AArch64/relocation-type-print.s new file mode 100644 index 0000000000000..111cbbb94bc54 --- /dev/null +++ b/bolt/test/AArch64/relocation-type-print.s @@ -0,0 +1,24 @@ +## Verify that llvm-bolt correctly prints relocation types. + +# REQUIRES: system-linux + +# RUN: %clang %cflags -nostartfiles %s -o %t.exe -Wl,-q,--no-relax +# RUN: llvm-bolt %t.exe --print-cfg --print-relocations -o %t.bolt \ +# RUN: | FileCheck %s + + .section .text + .align 4 + .globl _start + .type _start, %function +_start: + + adrp x0, _start +# CHECK: adrp +# CHECK-SAME: R_AARCH64_ADR_PREL_PG_HI21 + + add x0, x0, :lo12:_start +# CHECK-NEXT: add +# CHECK-SAME: R_AARCH64_ADD_ABS_LO12_NC + + ret + .size _start, .-_start From 54190970cf275fd1d8a99b7c84a6a106fd543c3d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 5 Nov 2025 20:57:51 +0000 Subject: [PATCH 61/61] [LV] Add tests for narrowing interleave groups with casts. Add additional tests for narrowing interleave groups with casts. --- ...to-widen-memory-with-wide-ops-and-casts.ll | 694 ++++++++++++++++++ 1 file changed, 694 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-and-casts.ll diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-and-casts.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-and-casts.ll new file mode 100644 index 0000000000000..bba7d058d6637 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-and-casts.ll @@ -0,0 +1,694 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph:" --version 5 +; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF2 %s +; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF4 %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx" + +define void @test_2xi64_matching_zext_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_matching_zext_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_matching_zext_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP2]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %ext.0 = zext i32 %l.0 to i64 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %ext.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + %ext.1 = zext i32 %l.0 to i64 + store i64 %ext.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_matching_sext_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_matching_sext_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_matching_sext_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP2]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %ext.0 = sext i32 %l.0 to i64 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %ext.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + %ext.1 = sext i32 %l.0 to i64 + store i64 %ext.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mismatching_cast_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_mismatching_cast_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP4:%.*]] = sext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; VF2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP4]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_mismatching_cast_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP4:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; VF4-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP4]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %ext.0 = zext i32 %l.0 to i64 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %ext.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + %ext.1 = sext i32 %l.0 to i64 + store i64 %ext.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_matching_cast_add_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_matching_cast_add_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], splat (i64 2) +; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_matching_cast_add_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP2]], splat (i64 2) +; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP3]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %ext.0 = zext i32 %l.0 to i64 + %add.0 = add i64 %ext.0, 2 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %add.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + %ext.1 = zext i32 %l.0 to i64 + %add.1 = add i64 %ext.1, 2 + store i64 %add.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mismatching_cast_add_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_mismatching_cast_add_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], splat (i64 2) +; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP5:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; VF2-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], splat (i64 2) +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_mismatching_cast_add_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP2]], splat (i64 2) +; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP5:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP5]], splat (i64 2) +; VF4-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP6]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %ext.0 = sext i32 %l.0 to i64 + %add.0 = add i64 %ext.0, 2 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %add.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + %ext.1 = zext i32 %l.0 to i64 + %add.1 = add i64 %ext.1, 2 + store i64 %add.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_add_cast_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_add_cast_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = add <2 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF2-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> +; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_add_cast_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF4-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> +; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP3]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %add.0 = add i32 %l.0, 2 + %ext.0 = zext i32 %add.0 to i64 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %ext.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %add.1 = add i32 %l.0, 2 + %ext.1 = zext i32 %add.1 to i64 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + store i64 %ext.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mismatching_add_cast_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_mismatching_add_cast_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = add <2 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF2-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> +; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP5:%.*]] = sub <2 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF2-NEXT: [[TMP6:%.*]] = zext <2 x i32> [[TMP5]] to <2 x i64> +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_mismatching_add_cast_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF4-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> +; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP5:%.*]] = sub <4 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF4-NEXT: [[TMP6:%.*]] = zext <4 x i32> [[TMP5]] to <4 x i64> +; VF4-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP6]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %add.0 = add i32 %l.0, 2 + %ext.0 = zext i32 %add.0 to i64 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %ext.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %add.1 = sub i32 %l.0, 2 + %ext.1 = zext i32 %add.1 to i64 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + store i64 %ext.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_add_mismatching_cast_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_add_mismatching_cast_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = add <2 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF2-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> +; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64> +; VF2-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP5]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_add_mismatching_cast_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF4-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> +; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP5:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64> +; VF4-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP5]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP6]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %add.0 = add i32 %l.0, 2 + %ext.0 = zext i32 %add.0 to i64 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %ext.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %add.1 = add i32 %l.0, 2 + %ext.1 = sext i32 %add.1 to i64 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + store i64 %ext.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_sub_mismatching_ops_cast_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_sub_mismatching_ops_cast_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = sub <2 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF2-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> +; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP5:%.*]] = sub <2 x i32> splat (i32 2), [[WIDE_LOAD]] +; VF2-NEXT: [[TMP6:%.*]] = zext <2 x i32> [[TMP5]] to <2 x i64> +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_sub_mismatching_ops_cast_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF4-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> +; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP5:%.*]] = sub <4 x i32> splat (i32 2), [[WIDE_LOAD]] +; VF4-NEXT: [[TMP6:%.*]] = zext <4 x i32> [[TMP5]] to <4 x i64> +; VF4-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP6]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %add.0 = sub i32 %l.0, 2 + %ext.0 = zext i32 %add.0 to i64 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %ext.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %add.1 = sub i32 2, %l.0 + %ext.1 = zext i32 %add.1 to i64 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + store i64 %ext.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +}