From e0bc38226c903937faaef9766b76bda9e8f0cb5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Tue, 21 Oct 2025 08:42:55 +0200 Subject: [PATCH 01/99] [flang] Fix standalone build regression from #161179 (#164309) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix incorrect linking and dependencies introduced in #161179 that break standalone builds of Flang. Signed-off-by: Michał Górny --- flang/lib/Optimizer/Dialect/MIF/CMakeLists.txt | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/flang/lib/Optimizer/Dialect/MIF/CMakeLists.txt b/flang/lib/Optimizer/Dialect/MIF/CMakeLists.txt index d52ab097ddbf4..ed8463e9b0330 100644 --- a/flang/lib/Optimizer/Dialect/MIF/CMakeLists.txt +++ b/flang/lib/Optimizer/Dialect/MIF/CMakeLists.txt @@ -3,18 +3,22 @@ add_flang_library(MIFDialect MIFOps.cpp DEPENDS - MLIRIR MIFOpsIncGen LINK_LIBS FIRDialect FIRDialectSupport FIRSupport - MLIRIR - MLIRTargetLLVMIRExport LINK_COMPONENTS AsmParser AsmPrinter Remarks + + MLIR_DEPS + MLIRIR + + MLIR_LIBS + MLIRIR + MLIRTargetLLVMIRExport ) From e4f3e9a3d1a3d78675fb3daa16cb6e97405f6627 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Mon, 20 Oct 2025 23:48:50 -0700 Subject: [PATCH 02/99] [AMDGPU] Remove magic constants from V_PK_ADD_F32 pattern. NFC (#164335) --- llvm/lib/Target/AMDGPU/SIInstructions.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 74d41532170a6..6f1feb1dc2996 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2223,8 +2223,8 @@ def : GCNPat < def : GCNPat < (DivergentUnaryFrag (v2f32 VReg_64:$src)), - (V_PK_ADD_F32 11 /* OP_SEL_1 | NEG_LO | HEG_HI */, VReg_64:$src, - 11 /* OP_SEL_1 | NEG_LO | HEG_HI */, (i64 0), + (V_PK_ADD_F32 !or(SRCMODS.OP_SEL_1, SRCMODS.NEG, SRCMODS.NEG_HI), VReg_64:$src, + !or(SRCMODS.OP_SEL_1, SRCMODS.NEG, SRCMODS.NEG_HI), (i64 0), 0, 0, 0, 0, 0) > { let SubtargetPredicate = HasPackedFP32Ops; From 07d47c792b980746ab1ff5ea3f346c87b024bd51 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Tue, 21 Oct 2025 09:23:46 +0200 Subject: [PATCH 03/99] [AMDGPU] Update code sequence for CU-mode Release Fences in GFX10+ (#161638) They were previously optimized to not emit any waitcnt, which is technically correct because there is no reordering of operations at workgroup scope in CU mode for GFX10+. This breaks transitivity however, for example if we have the following sequence of events in one thread: - some stores - store atomic release syncscope("workgroup") - barrier then another thread follows with - barrier - load atomic acquire - store atomic release syncscope("agent") It does not work because, while the other thread sees the stores, it cannot release them at the wider scope. Our release fences aren't strong enough to "wait" on stores from other waves. We also cannot strengthen our release fences any further to allow for releasing other wave's stores because only GFX12 can do that with `global_wb`. GFX10-11 do not have the writeback instruction. It'd also add yet another level of complexity to code sequences, with both acquire/release having CU-mode only alternatives. Lastly, acq/rel are always used together. The price for synchronization has to be paid either at the acq, or the rel. Strengthening the releases would just make the memory model more complex but wouldn't help performance. So the choice here is to streamline the code sequences by making CU and WGP mode emit almost identical (vL0 inv is not needed in CU mode) code for release (or stronger) atomic ordering. This also removes the `vm_vsrc(0)` wait before barriers. Now that the release fence in CU mode is strong enough, it is no longer needed. Supersedes #160501 Solves SC1-6454 --- llvm/docs/AMDGPUUsage.rst | 61 +- llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 51 +- .../memory-legalizer-atomic-fence.ll | 30 +- .../AMDGPU/lds-dma-workgroup-release.ll | 1 - .../AMDGPU/memory-legalizer-barriers.ll | 20 +- .../memory-legalizer-fence-mmra-global.ll | 48 ++ .../CodeGen/AMDGPU/memory-legalizer-fence.ll | 57 +- .../AMDGPU/memory-legalizer-flat-volatile.ll | 11 +- .../AMDGPU/memory-legalizer-flat-workgroup.ll | 786 +++++++++++++----- .../memory-legalizer-global-volatile.ll | 11 +- .../memory-legalizer-global-workgroup.ll | 632 ++++++++++++-- .../AMDGPU/memory-legalizer-local-agent.ll | 330 ++++++-- .../AMDGPU/memory-legalizer-local-cluster.ll | 330 ++++++-- .../AMDGPU/memory-legalizer-local-system.ll | 330 ++++++-- .../AMDGPU/memory-legalizer-local-volatile.ll | 11 +- .../memory-legalizer-local-workgroup.ll | 330 ++++++-- 16 files changed, 2270 insertions(+), 769 deletions(-) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index f971bc0756544..7780c0a6dca0a 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -13239,9 +13239,6 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. store atomic release - workgroup - global 1. s_waitcnt lgkmcnt(0) & - generic vmcnt(0) & vscnt(0) - - If CU wavefront execution - mode, omit vmcnt(0) and - vscnt(0). - If OpenCL, omit lgkmcnt(0). - Could be split into @@ -13287,8 +13284,6 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. 2. buffer/global/flat_store store atomic release - workgroup - local 1. s_waitcnt vmcnt(0) & vscnt(0) - - If CU wavefront execution - mode, omit. - If OpenCL, omit. - Could be split into separate s_waitcnt @@ -13376,9 +13371,6 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. atomicrmw release - workgroup - global 1. s_waitcnt lgkmcnt(0) & - generic vmcnt(0) & vscnt(0) - - If CU wavefront execution - mode, omit vmcnt(0) and - vscnt(0). - If OpenCL, omit lgkmcnt(0). - Could be split into separate s_waitcnt @@ -13423,8 +13415,6 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. 2. buffer/global/flat_atomic atomicrmw release - workgroup - local 1. s_waitcnt vmcnt(0) & vscnt(0) - - If CU wavefront execution - mode, omit. - If OpenCL, omit. - Could be split into separate s_waitcnt @@ -13508,9 +13498,6 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. fence release - workgroup *none* 1. s_waitcnt lgkmcnt(0) & vmcnt(0) & vscnt(0) - - If CU wavefront execution - mode, omit vmcnt(0) and - vscnt(0). - If OpenCL and address space is not generic, omit @@ -13637,9 +13624,6 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. atomicrmw acq_rel - workgroup - global 1. s_waitcnt lgkmcnt(0) & vmcnt(0) & vscnt(0) - - If CU wavefront execution - mode, omit vmcnt(0) and - vscnt(0). - If OpenCL, omit lgkmcnt(0). - Must happen after @@ -13691,8 +13675,6 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. 2. buffer/global_atomic 3. s_waitcnt vm/vscnt(0) - - If CU wavefront execution - mode, omit. - Use vmcnt(0) if atomic with return and vscnt(0) if atomic with no-return. @@ -13717,8 +13699,6 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. atomicrmw acq_rel - workgroup - local 1. s_waitcnt vmcnt(0) & vscnt(0) - - If CU wavefront execution - mode, omit. - If OpenCL, omit. - Could be split into separate s_waitcnt @@ -13778,9 +13758,6 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. atomicrmw acq_rel - workgroup - generic 1. s_waitcnt lgkmcnt(0) & vmcnt(0) & vscnt(0) - - If CU wavefront execution - mode, omit vmcnt(0) and - vscnt(0). - If OpenCL, omit lgkmcnt(0). - Could be split into separate s_waitcnt @@ -13826,9 +13803,9 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. 3. s_waitcnt lgkmcnt(0) & vmcnt(0) & vscnt(0) - - If CU wavefront execution - mode, omit vmcnt(0) and - vscnt(0). + - If atomic with return, omit + vscnt(0), if atomic with + no-return, omit vmcnt(0). - If OpenCL, omit lgkmcnt(0). - Must happen before the following @@ -14001,9 +13978,6 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. fence acq_rel - workgroup *none* 1. s_waitcnt lgkmcnt(0) & vmcnt(0) & vscnt(0) - - If CU wavefront execution - mode, omit vmcnt(0) and - vscnt(0). - If OpenCL and address space is not generic, omit @@ -14233,9 +14207,6 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. load atomic seq_cst - workgroup - global 1. s_waitcnt lgkmcnt(0) & - generic vmcnt(0) & vscnt(0) - - If CU wavefront execution - mode, omit vmcnt(0) and - vscnt(0). - Could be split into separate s_waitcnt vmcnt(0), s_waitcnt @@ -14344,8 +14315,6 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. 1. s_waitcnt vmcnt(0) & vscnt(0) - - If CU wavefront execution - mode, omit. - Could be split into separate s_waitcnt vmcnt(0) and s_waitcnt @@ -15347,8 +15316,6 @@ the instruction in the code sequence that references the table. | ``s_wait_storecnt 0x0`` | ``s_wait_loadcnt 0x0`` | ``s_wait_dscnt 0x0`` - | **CU wavefront execution mode:** - | ``s_wait_dscnt 0x0`` - If OpenCL, omit ``s_wait_dscnt 0x0``. - The waits can be @@ -15394,8 +15361,6 @@ the instruction in the code sequence that references the table. | ``s_wait_storecnt 0x0`` | ``s_wait_loadcnt 0x0`` | ``s_wait_dscnt 0x0`` - | **CU wavefront execution mode:** - | ``s_wait_dscnt 0x0`` - If OpenCL, omit. - The waits can be @@ -15489,8 +15454,6 @@ the instruction in the code sequence that references the table. | ``s_wait_storecnt 0x0`` | ``s_wait_loadcnt 0x0`` | ``s_wait_dscnt 0x0`` - | **CU wavefront execution mode:** - | ``s_wait_dscnt 0x0`` - If OpenCL, omit ``s_wait_dscnt 0x0``. - If OpenCL and CU wavefront @@ -15540,8 +15503,6 @@ the instruction in the code sequence that references the table. | ``s_wait_storecnt 0x0`` | ``s_wait_loadcnt 0x0`` | ``s_wait_dscnt 0x0`` - | **CU wavefront execution mode:** - | ``s_wait_dscnt 0x0`` - If OpenCL, omit all. - The waits can be @@ -15633,8 +15594,6 @@ the instruction in the code sequence that references the table. | ``s_wait_storecnt 0x0`` | ``s_wait_loadcnt 0x0`` | ``s_wait_dscnt 0x0`` - | **CU wavefront execution mode:** - | ``s_wait_dscnt 0x0`` - If OpenCL, omit ``s_wait_dscnt 0x0``. - If OpenCL and @@ -15764,8 +15723,6 @@ the instruction in the code sequence that references the table. | ``s_wait_storecnt 0x0`` | ``s_wait_loadcnt 0x0`` | ``s_wait_dscnt 0x0`` - | **CU wavefront execution mode:** - | ``s_wait_dscnt 0x0`` - If OpenCL, omit ``s_wait_dscnt 0x0``. - Must happen after @@ -15822,8 +15779,6 @@ the instruction in the code sequence that references the table. | **Atomic without return:** | ``s_wait_storecnt 0x0`` - - If CU wavefront execution - mode, omit. - Must happen before the following ``global_inv``. @@ -15848,8 +15803,6 @@ the instruction in the code sequence that references the table. | ``s_wait_storecnt 0x0`` | ``s_wait_loadcnt 0x0`` | ``s_wait_dscnt 0x0`` - | **CU wavefront execution mode:** - | ``s_wait_dscnt 0x0`` - If OpenCL, omit. - The waits can be @@ -15911,8 +15864,6 @@ the instruction in the code sequence that references the table. | ``s_wait_storecnt 0x0`` | ``s_wait_loadcnt 0x0`` | ``s_wait_dscnt 0x0`` - | **CU wavefront execution mode:** - | ``s_wait_dscnt 0x0`` - If OpenCL, omit ``s_wait_loadcnt 0x0``. - The waits can be @@ -16164,8 +16115,6 @@ the instruction in the code sequence that references the table. | ``s_wait_storecnt 0x0`` | ``s_wait_loadcnt 0x0`` | ``s_wait_dscnt 0x0`` - | **CU wavefront execution mode:** - | ``s_wait_dscnt 0x0`` - If OpenCL and address space is @@ -16394,8 +16343,6 @@ the instruction in the code sequence that references the table. | ``s_wait_storecnt 0x0`` | ``s_wait_loadcnt 0x0`` | ``s_wait_dscnt 0x0`` - | **CU wavefront execution mode:** - | ``s_wait_dscnt 0x0`` - If OpenCL, omit ``s_wait_dscnt 0x0`` @@ -16502,8 +16449,6 @@ the instruction in the code sequence that references the table. | ``s_wait_storecnt 0x0`` | ``s_wait_loadcnt 0x0`` | ``s_wait_dscnt 0x0`` - | **CU wavefront execution mode:** - | ``s_wait_dscnt 0x0`` - If OpenCL, omit all. - The waits can be diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index bdbc000524ce4..07264d973648f 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -397,12 +397,6 @@ class SICacheControl { bool IsCrossAddrSpaceOrdering, Position Pos) const = 0; - /// Inserts any necessary instructions before the barrier start instruction - /// \p MI in order to support pairing of barriers and fences. - virtual bool insertBarrierStart(MachineBasicBlock::iterator &MI) const { - return false; - }; - /// Virtual destructor to allow derivations to be deleted. virtual ~SICacheControl() = default; }; @@ -583,12 +577,8 @@ class SIGfx10CacheControl : public SIGfx7CacheControl { bool IsCrossAddrSpaceOrdering, Position Pos, AtomicOrdering Order, bool AtomicsOnly) const override; - bool insertAcquire(MachineBasicBlock::iterator &MI, - SIAtomicScope Scope, - SIAtomicAddrSpace AddrSpace, - Position Pos) const override; - - bool insertBarrierStart(MachineBasicBlock::iterator &MI) const override; + bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace, Position Pos) const override; }; class SIGfx11CacheControl : public SIGfx10CacheControl { @@ -2069,8 +2059,11 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI, // the WGP. Therefore need to wait for operations to complete to ensure // they are visible to waves in the other CU as the L0 is per CU. // Otherwise in CU mode and all waves of a work-group are on the same CU - // which shares the same L0. - if (!ST.isCuModeEnabled()) { + // which shares the same L0. Note that we still need to wait when + // performing a release in this mode to respect the transitivity of + // happens-before, e.g. other waves of the workgroup must be able to + // release the memory from another wave at a wider scope. + if (!ST.isCuModeEnabled() || isReleaseOrStronger(Order)) { if ((Op & SIMemOp::LOAD) != SIMemOp::NONE) VMCnt |= true; if ((Op & SIMemOp::STORE) != SIMemOp::NONE) @@ -2225,22 +2218,6 @@ bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, return Changed; } -bool SIGfx10CacheControl::insertBarrierStart( - MachineBasicBlock::iterator &MI) const { - // We need to wait on vm_vsrc so barriers can pair with fences in GFX10+ CU - // mode. This is because a CU mode release fence does not emit any wait, which - // is fine when only dealing with vmem, but isn't sufficient in the presence - // of barriers which do not go through vmem. - // GFX12.5 does not require this additional wait. - if (!ST.isCuModeEnabled() || ST.hasGFX1250Insts()) - return false; - - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - TII->get(AMDGPU::S_WAITCNT_DEPCTR)) - .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0)); - return true; -} - bool SIGfx11CacheControl::enableLoadCacheBypass( const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const { @@ -2419,15 +2396,20 @@ bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI, // In WGP mode the waves of a work-group can be executing on either CU // of the WGP. Therefore need to wait for operations to complete to // ensure they are visible to waves in the other CU as the L0 is per CU. + // // Otherwise in CU mode and all waves of a work-group are on the same CU - // which shares the same L0. + // which shares the same L0. Note that we still need to wait when + // performing a release in this mode to respect the transitivity of + // happens-before, e.g. other waves of the workgroup must be able to + // release the memory from another wave at a wider scope. // // GFX12.5: // CU$ has two ports. To ensure operations are visible at the workgroup // level, we need to ensure all operations in this port have completed // so the other SIMDs in the WG can see them. There is no ordering // guarantee between the ports. - if (!ST.isCuModeEnabled() || ST.hasGFX1250Insts()) { + if (!ST.isCuModeEnabled() || ST.hasGFX1250Insts() || + isReleaseOrStronger(Order)) { if ((Op & SIMemOp::LOAD) != SIMemOp::NONE) LOADCnt |= true; if ((Op & SIMemOp::STORE) != SIMemOp::NONE) @@ -3017,11 +2999,6 @@ bool SIMemoryLegalizer::run(MachineFunction &MF) { MI = II->getIterator(); } - if (ST.getInstrInfo()->isBarrierStart(MI->getOpcode())) { - Changed |= CC->insertBarrierStart(MI); - continue; - } - if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic)) continue; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll index 002c03aa7967d..e86f7473363f7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll @@ -551,7 +551,9 @@ define amdgpu_kernel void @workgroup_one_as_release() #0 { ; ; GFX10CU-LABEL: name: workgroup_one_as_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_lds_direct + ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_one_as_release @@ -562,6 +564,8 @@ define amdgpu_kernel void @workgroup_one_as_release() #0 { ; ; GFX11CU-LABEL: name: workgroup_one_as_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: S_WAITCNT_soft 1015 + ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup-one-as") release @@ -587,7 +591,9 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 { ; ; GFX10CU-LABEL: name: workgroup_one_as_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_lds_direct + ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_one_as_acq_rel @@ -599,6 +605,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 { ; ; GFX11CU-LABEL: name: workgroup_one_as_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: S_WAITCNT_soft 1015 + ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup-one-as") acq_rel @@ -624,7 +632,9 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 { ; ; GFX10CU-LABEL: name: workgroup_one_as_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_lds_direct + ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_one_as_seq_cst @@ -636,6 +646,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 { ; ; GFX11CU-LABEL: name: workgroup_one_as_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: S_WAITCNT_soft 1015 + ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup-one-as") seq_cst @@ -1305,8 +1317,9 @@ define amdgpu_kernel void @workgroup_release() #0 { ; ; GFX10CU-LABEL: name: workgroup_release ; GFX10CU: bb.0.entry: - ; GFX10CU-NEXT: S_WAITCNT_soft 49279 + ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_lds_direct + ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_release @@ -1317,7 +1330,8 @@ define amdgpu_kernel void @workgroup_release() #0 { ; ; GFX11CU-LABEL: name: workgroup_release ; GFX11CU: bb.0.entry: - ; GFX11CU-NEXT: S_WAITCNT_soft 64519 + ; GFX11CU-NEXT: S_WAITCNT_soft 7 + ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup") release @@ -1345,8 +1359,9 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; ; GFX10CU-LABEL: name: workgroup_acq_rel ; GFX10CU: bb.0.entry: - ; GFX10CU-NEXT: S_WAITCNT_soft 49279 + ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_lds_direct + ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_acq_rel @@ -1358,7 +1373,8 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; ; GFX11CU-LABEL: name: workgroup_acq_rel ; GFX11CU: bb.0.entry: - ; GFX11CU-NEXT: S_WAITCNT_soft 64519 + ; GFX11CU-NEXT: S_WAITCNT_soft 7 + ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup") acq_rel @@ -1386,8 +1402,9 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 { ; ; GFX10CU-LABEL: name: workgroup_seq_cst ; GFX10CU: bb.0.entry: - ; GFX10CU-NEXT: S_WAITCNT_soft 49279 + ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_lds_direct + ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_seq_cst @@ -1399,7 +1416,8 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 { ; ; GFX11CU-LABEL: name: workgroup_seq_cst ; GFX11CU: bb.0.entry: - ; GFX11CU-NEXT: S_WAITCNT_soft 64519 + ; GFX11CU-NEXT: S_WAITCNT_soft 7 + ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup") seq_cst diff --git a/llvm/test/CodeGen/AMDGPU/lds-dma-workgroup-release.ll b/llvm/test/CodeGen/AMDGPU/lds-dma-workgroup-release.ll index b91963f08681c..d23509b5aa812 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-dma-workgroup-release.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-dma-workgroup-release.ll @@ -150,7 +150,6 @@ define amdgpu_kernel void @barrier_release(<4 x i32> inreg %rsrc, ; GFX10CU-NEXT: buffer_load_dword v0, s[8:11], 0 offen lds ; GFX10CU-NEXT: v_mov_b32_e32 v0, s13 ; GFX10CU-NEXT: s_waitcnt vmcnt(0) -; GFX10CU-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10CU-NEXT: s_barrier ; GFX10CU-NEXT: ds_read_b32 v0, v0 ; GFX10CU-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers.ll index 516c3946f63dc..282a7ae7ea2fd 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers.ll @@ -15,7 +15,6 @@ define amdgpu_kernel void @test_s_barrier() { ; ; GFX10-CU-LABEL: test_s_barrier: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-CU-NEXT: s_barrier ; GFX10-CU-NEXT: s_endpgm ; @@ -26,7 +25,6 @@ define amdgpu_kernel void @test_s_barrier() { ; ; GFX11-CU-LABEL: test_s_barrier: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt_depctr 0xffe3 ; GFX11-CU-NEXT: s_barrier ; GFX11-CU-NEXT: s_endpgm ; @@ -38,7 +36,6 @@ define amdgpu_kernel void @test_s_barrier() { ; ; GFX12-CU-LABEL: test_s_barrier: ; GFX12-CU: ; %bb.0: ; %entry -; GFX12-CU-NEXT: s_wait_alu 0xffe3 ; GFX12-CU-NEXT: s_barrier_signal -1 ; GFX12-CU-NEXT: s_barrier_wait -1 ; GFX12-CU-NEXT: s_endpgm @@ -63,8 +60,8 @@ define amdgpu_kernel void @test_s_barrier_workgroup_fence() { ; ; GFX10-CU-LABEL: test_s_barrier_workgroup_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_barrier ; GFX10-CU-NEXT: s_endpgm ; @@ -77,8 +74,8 @@ define amdgpu_kernel void @test_s_barrier_workgroup_fence() { ; ; GFX11-CU-LABEL: test_s_barrier_workgroup_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_depctr 0xffe3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_barrier ; GFX11-CU-NEXT: s_endpgm ; @@ -94,8 +91,10 @@ define amdgpu_kernel void @test_s_barrier_workgroup_fence() { ; ; GFX12-CU-LABEL: test_s_barrier_workgroup_fence: ; GFX12-CU: ; %bb.0: ; %entry -; GFX12-CU-NEXT: s_wait_dscnt 0x0 -; GFX12-CU-NEXT: s_wait_alu 0xffe3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: s_barrier_signal -1 ; GFX12-CU-NEXT: s_barrier_wait -1 ; GFX12-CU-NEXT: s_endpgm @@ -125,7 +124,6 @@ define amdgpu_kernel void @test_s_barrier_agent_fence() { ; GFX10-CU: ; %bb.0: ; %entry ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-CU-NEXT: s_barrier ; GFX10-CU-NEXT: s_endpgm ; @@ -140,7 +138,6 @@ define amdgpu_kernel void @test_s_barrier_agent_fence() { ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: s_waitcnt_depctr 0xffe3 ; GFX11-CU-NEXT: s_barrier ; GFX11-CU-NEXT: s_endpgm ; @@ -160,7 +157,6 @@ define amdgpu_kernel void @test_s_barrier_agent_fence() { ; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-CU-NEXT: s_wait_alu 0xffe3 ; GFX12-CU-NEXT: s_barrier_signal -1 ; GFX12-CU-NEXT: s_barrier_wait -1 ; GFX12-CU-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll index 6a76f4307dcad..7efbff9c637c5 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll @@ -107,6 +107,8 @@ define amdgpu_kernel void @workgroup_release_fence() { ; ; GFX10-CU-LABEL: workgroup_release_fence: ; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: workgroup_release_fence: @@ -139,6 +141,8 @@ define amdgpu_kernel void @workgroup_release_fence() { ; ; GFX11-CU-LABEL: workgroup_release_fence: ; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: workgroup_release_fence: @@ -151,6 +155,10 @@ define amdgpu_kernel void @workgroup_release_fence() { ; ; GFX12-CU-LABEL: workgroup_release_fence: ; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: workgroup_release_fence: @@ -181,6 +189,8 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() { ; ; GFX10-CU-LABEL: workgroup_acq_rel_fence: ; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: workgroup_acq_rel_fence: @@ -216,6 +226,8 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() { ; ; GFX11-CU-LABEL: workgroup_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: workgroup_acq_rel_fence: @@ -229,6 +241,10 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() { ; ; GFX12-CU-LABEL: workgroup_acq_rel_fence: ; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: workgroup_acq_rel_fence: @@ -259,6 +275,8 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() { ; ; GFX10-CU-LABEL: workgroup_seq_cst_fence: ; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: workgroup_seq_cst_fence: @@ -294,6 +312,8 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() { ; ; GFX11-CU-LABEL: workgroup_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: workgroup_seq_cst_fence: @@ -307,6 +327,10 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() { ; ; GFX12-CU-LABEL: workgroup_seq_cst_fence: ; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: workgroup_seq_cst_fence: @@ -412,6 +436,8 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() { ; ; GFX10-CU-LABEL: workgroup_one_as_release_fence: ; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: workgroup_one_as_release_fence: @@ -444,6 +470,8 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() { ; ; GFX11-CU-LABEL: workgroup_one_as_release_fence: ; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: workgroup_one_as_release_fence: @@ -456,6 +484,10 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() { ; ; GFX12-CU-LABEL: workgroup_one_as_release_fence: ; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: workgroup_one_as_release_fence: @@ -486,6 +518,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() { ; ; GFX10-CU-LABEL: workgroup_one_as_acq_rel_fence: ; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: workgroup_one_as_acq_rel_fence: @@ -521,6 +555,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() { ; ; GFX11-CU-LABEL: workgroup_one_as_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: workgroup_one_as_acq_rel_fence: @@ -534,6 +570,10 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() { ; ; GFX12-CU-LABEL: workgroup_one_as_acq_rel_fence: ; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: workgroup_one_as_acq_rel_fence: @@ -564,6 +604,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() { ; ; GFX10-CU-LABEL: workgroup_one_as_seq_cst_fence: ; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: workgroup_one_as_seq_cst_fence: @@ -599,6 +641,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() { ; ; GFX11-CU-LABEL: workgroup_one_as_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: workgroup_one_as_seq_cst_fence: @@ -612,6 +656,10 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() { ; ; GFX12-CU-LABEL: workgroup_one_as_seq_cst_fence: ; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: workgroup_one_as_seq_cst_fence: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll index d288bfc6a09db..1cca64ad6d2b4 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll @@ -1093,7 +1093,8 @@ define amdgpu_kernel void @workgroup_release_fence() { ; ; GFX10-CU-LABEL: workgroup_release_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: workgroup_release_fence: @@ -1129,7 +1130,8 @@ define amdgpu_kernel void @workgroup_release_fence() { ; ; GFX11-CU-LABEL: workgroup_release_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: workgroup_release_fence: @@ -1142,7 +1144,10 @@ define amdgpu_kernel void @workgroup_release_fence() { ; ; GFX12-CU-LABEL: workgroup_release_fence: ; GFX12-CU: ; %bb.0: ; %entry -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: workgroup_release_fence: @@ -1175,7 +1180,8 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() { ; ; GFX10-CU-LABEL: workgroup_acq_rel_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: workgroup_acq_rel_fence: @@ -1214,7 +1220,8 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() { ; ; GFX11-CU-LABEL: workgroup_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: workgroup_acq_rel_fence: @@ -1228,7 +1235,10 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() { ; ; GFX12-CU-LABEL: workgroup_acq_rel_fence: ; GFX12-CU: ; %bb.0: ; %entry -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: workgroup_acq_rel_fence: @@ -1261,7 +1271,8 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() { ; ; GFX10-CU-LABEL: workgroup_seq_cst_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: workgroup_seq_cst_fence: @@ -1300,7 +1311,8 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() { ; ; GFX11-CU-LABEL: workgroup_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: workgroup_seq_cst_fence: @@ -1314,7 +1326,10 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() { ; ; GFX12-CU-LABEL: workgroup_seq_cst_fence: ; GFX12-CU: ; %bb.0: ; %entry -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: workgroup_seq_cst_fence: @@ -1420,6 +1435,8 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() { ; ; GFX10-CU-LABEL: workgroup_one_as_release_fence: ; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: workgroup_one_as_release_fence: @@ -1452,6 +1469,8 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() { ; ; GFX11-CU-LABEL: workgroup_one_as_release_fence: ; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: workgroup_one_as_release_fence: @@ -1464,6 +1483,10 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() { ; ; GFX12-CU-LABEL: workgroup_one_as_release_fence: ; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: workgroup_one_as_release_fence: @@ -1494,6 +1517,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() { ; ; GFX10-CU-LABEL: workgroup_one_as_acq_rel_fence: ; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: workgroup_one_as_acq_rel_fence: @@ -1529,6 +1554,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() { ; ; GFX11-CU-LABEL: workgroup_one_as_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: workgroup_one_as_acq_rel_fence: @@ -1542,6 +1569,10 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() { ; ; GFX12-CU-LABEL: workgroup_one_as_acq_rel_fence: ; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: workgroup_one_as_acq_rel_fence: @@ -1572,6 +1603,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() { ; ; GFX10-CU-LABEL: workgroup_one_as_seq_cst_fence: ; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: workgroup_one_as_seq_cst_fence: @@ -1607,6 +1640,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() { ; ; GFX11-CU-LABEL: workgroup_one_as_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: workgroup_one_as_seq_cst_fence: @@ -1620,6 +1655,10 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() { ; ; GFX12-CU-LABEL: workgroup_one_as_seq_cst_fence: ; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: workgroup_one_as_seq_cst_fence: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll index d277441d422d9..2afa5779c7522 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll @@ -1072,7 +1072,8 @@ define amdgpu_kernel void @flat_volatile_workgroup_release_store( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -1109,7 +1110,8 @@ define amdgpu_kernel void @flat_volatile_workgroup_release_store( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -1136,7 +1138,10 @@ define amdgpu_kernel void @flat_volatile_workgroup_release_store( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll index 3826953a8e2ab..d384aec2a2b19 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll @@ -656,12 +656,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_load( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -765,12 +765,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -800,12 +800,14 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_load( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -1193,7 +1195,8 @@ define amdgpu_kernel void @flat_workgroup_release_store( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -1278,7 +1281,8 @@ define amdgpu_kernel void @flat_workgroup_release_store( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -1305,7 +1309,10 @@ define amdgpu_kernel void @flat_workgroup_release_store( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -1372,7 +1379,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_store( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -1457,7 +1465,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_store( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -1484,7 +1493,10 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_store( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -1891,7 +1903,8 @@ define amdgpu_kernel void @flat_workgroup_release_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -1976,7 +1989,8 @@ define amdgpu_kernel void @flat_workgroup_release_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -2003,7 +2017,10 @@ define amdgpu_kernel void @flat_workgroup_release_atomicrmw( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -2074,9 +2091,11 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_atomicrmw: @@ -2170,9 +2189,11 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_acq_rel_atomicrmw: @@ -2200,9 +2221,12 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_atomicrmw( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_acq_rel_atomicrmw: @@ -2273,9 +2297,11 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_atomicrmw: @@ -2369,9 +2395,11 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_seq_cst_atomicrmw: @@ -2399,9 +2427,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_atomicrmw( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_seq_cst_atomicrmw: @@ -2697,12 +2728,12 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s6 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -2813,12 +2844,12 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -2850,12 +2881,14 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -2935,12 +2968,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s6 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -3051,12 +3084,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -3088,12 +3121,14 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -3731,7 +3766,8 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -3854,7 +3890,8 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_endpgm ; @@ -3889,7 +3926,10 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX12-CU-NEXT: s_endpgm ; @@ -4007,9 +4047,11 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_monotonic_cmpxchg: @@ -4141,9 +4183,11 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_acq_rel_monotonic_cmpxchg: @@ -4179,9 +4223,12 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_acq_rel_monotonic_cmpxchg: @@ -4299,9 +4346,11 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_monotonic_cmpxchg: @@ -4433,9 +4482,11 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_seq_cst_monotonic_cmpxchg: @@ -4471,9 +4522,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_seq_cst_monotonic_cmpxchg: @@ -5137,9 +5191,11 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_acquire_cmpxchg: @@ -5271,9 +5327,11 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_release_acquire_cmpxchg: @@ -5309,9 +5367,12 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_release_acquire_cmpxchg: @@ -5429,9 +5490,11 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_acquire_cmpxchg: @@ -5563,9 +5626,11 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_acq_rel_acquire_cmpxchg: @@ -5601,9 +5666,12 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_acq_rel_acquire_cmpxchg: @@ -5721,9 +5789,11 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_acquire_cmpxchg: @@ -5855,9 +5925,11 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_seq_cst_acquire_cmpxchg: @@ -5893,9 +5965,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_seq_cst_acquire_cmpxchg: @@ -6013,9 +6088,11 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_seq_cst_cmpxchg: @@ -6147,9 +6224,11 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_seq_cst_seq_cst_cmpxchg: @@ -6185,9 +6264,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_seq_cst_seq_cst_cmpxchg: @@ -6923,7 +7005,8 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 @@ -7070,7 +7153,8 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 @@ -7113,7 +7197,10 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 @@ -7245,12 +7332,12 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -7399,12 +7486,12 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -7444,12 +7531,14 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -7577,12 +7666,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -7731,12 +7820,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -7776,12 +7865,14 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -8535,12 +8626,12 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -8689,12 +8780,12 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -8734,12 +8825,14 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -8867,12 +8960,12 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -9021,12 +9114,12 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -9066,12 +9159,14 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -9199,12 +9294,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -9353,12 +9448,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -9398,12 +9493,14 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -9531,7 +9628,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -9685,7 +9783,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9730,7 +9829,10 @@ define amdgpu_kernel void @flat_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9863,7 +9965,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -10017,7 +10120,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -10062,7 +10166,10 @@ define amdgpu_kernel void @flat_workgroup_acquire_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -10195,12 +10302,12 @@ define amdgpu_kernel void @flat_workgroup_release_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -10349,12 +10456,12 @@ define amdgpu_kernel void @flat_workgroup_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -10394,12 +10501,14 @@ define amdgpu_kernel void @flat_workgroup_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -10527,12 +10636,12 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -10681,12 +10790,12 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -10726,12 +10835,14 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -10859,12 +10970,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -11013,12 +11124,12 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -11058,12 +11169,14 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -11732,10 +11845,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_load( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -11834,10 +11950,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -11868,10 +11987,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_load( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -12258,6 +12382,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_store( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -12339,6 +12465,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_store( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -12365,6 +12493,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_store( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -12430,6 +12562,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_store( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -12511,6 +12645,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_store( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -12537,6 +12673,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_store( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -12933,6 +13073,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -13014,6 +13156,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -13040,6 +13184,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_atomicrmw( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -13107,7 +13255,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_atomicrmw: @@ -13194,7 +13345,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_one_as_acq_rel_atomicrmw: @@ -13222,7 +13376,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_atomicrmw( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_one_as_acq_rel_atomicrmw: @@ -13290,7 +13449,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_atomicrmw: @@ -13377,7 +13539,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_one_as_seq_cst_atomicrmw: @@ -13405,7 +13570,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_atomicrmw( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_one_as_seq_cst_atomicrmw: @@ -13696,10 +13866,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s6 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -13805,10 +13978,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -13841,10 +14017,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s2 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -13923,10 +14104,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s6 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -14032,10 +14216,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -14068,10 +14255,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s2 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -14699,6 +14891,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -14818,6 +15012,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_endpgm ; @@ -14852,6 +15048,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX12-CU-NEXT: s_endpgm ; @@ -14966,7 +15166,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_monotonic_cmpxchg: @@ -15091,7 +15294,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_one_as_acq_rel_monotonic_cmpxchg: @@ -15127,7 +15333,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_one_as_acq_rel_monotonic_cmpxchg: @@ -15242,7 +15453,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_monotonic_cmpxchg: @@ -15367,7 +15581,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_one_as_seq_cst_monotonic_cmpxchg: @@ -15403,7 +15620,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_one_as_seq_cst_monotonic_cmpxchg: @@ -16046,7 +16268,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_acquire_cmpxchg: @@ -16171,7 +16396,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_one_as_release_acquire_cmpxchg: @@ -16207,7 +16435,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_one_as_release_acquire_cmpxchg: @@ -16322,7 +16555,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_acquire_cmpxchg: @@ -16447,7 +16683,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_one_as_acq_rel_acquire_cmpxchg: @@ -16483,7 +16722,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_one_as_acq_rel_acquire_cmpxchg: @@ -16598,7 +16842,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_acquire_cmpxchg: @@ -16723,7 +16970,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_one_as_seq_cst_acquire_cmpxchg: @@ -16759,7 +17009,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_one_as_seq_cst_acquire_cmpxchg: @@ -16874,6 +17129,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -16999,6 +17256,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_endpgm ; @@ -17035,6 +17294,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX12-CU-NEXT: s_endpgm ; @@ -17150,6 +17413,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -17275,6 +17540,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_endpgm ; @@ -17311,6 +17578,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX12-CU-NEXT: s_endpgm ; @@ -17426,7 +17697,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_seq_cst_cmpxchg: @@ -17551,7 +17825,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_one_as_release_seq_cst_cmpxchg: @@ -17587,7 +17864,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_one_as_release_seq_cst_cmpxchg: @@ -17702,7 +17984,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg: @@ -17827,7 +18112,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg: @@ -17863,7 +18151,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg: @@ -17978,7 +18271,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg: @@ -18103,7 +18399,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg: @@ -18139,7 +18438,12 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg: @@ -18870,6 +19174,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 @@ -19013,6 +19319,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 @@ -19055,6 +19363,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 @@ -19185,10 +19497,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -19332,10 +19647,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -19376,10 +19694,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -19506,10 +19829,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -19653,10 +19979,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -19697,10 +20026,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -20445,10 +20779,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -20592,10 +20929,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -20636,10 +20976,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -20766,10 +21111,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -20913,10 +21261,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -20957,10 +21308,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -21087,10 +21443,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -21234,10 +21593,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -21278,10 +21640,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -21408,6 +21775,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 @@ -21555,6 +21924,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 @@ -21599,6 +21970,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 @@ -21729,6 +22104,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 @@ -21876,6 +22253,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 @@ -21920,6 +22299,10 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 @@ -22050,10 +22433,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -22197,10 +22583,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -22241,10 +22630,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -22371,10 +22765,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -22518,10 +22915,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -22562,10 +22962,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; @@ -22692,10 +23097,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -22839,10 +23247,13 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ; @@ -22883,10 +23294,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-CU-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll index 3bf5ed8b2397f..c326edfdd490e 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll @@ -959,7 +959,8 @@ define amdgpu_kernel void @global_volatile_workgroup_release_store( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] ; GFX10-CU-NEXT: s_endpgm ; @@ -1001,7 +1002,8 @@ define amdgpu_kernel void @global_volatile_workgroup_release_store( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_endpgm ; @@ -1026,7 +1028,10 @@ define amdgpu_kernel void @global_volatile_workgroup_release_store( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12-CU-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll index b755c5d615dda..868b438151558 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll @@ -667,7 +667,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_load( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -763,7 +764,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_load( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -790,7 +792,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_load( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 @@ -1204,7 +1209,8 @@ define amdgpu_kernel void @global_workgroup_release_store( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] ; GFX10-CU-NEXT: s_endpgm ; @@ -1290,7 +1296,8 @@ define amdgpu_kernel void @global_workgroup_release_store( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_endpgm ; @@ -1315,7 +1322,10 @@ define amdgpu_kernel void @global_workgroup_release_store( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12-CU-NEXT: s_endpgm ; @@ -1391,7 +1401,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_store( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] ; GFX10-CU-NEXT: s_endpgm ; @@ -1477,7 +1488,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_store( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_endpgm ; @@ -1502,7 +1514,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_store( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12-CU-NEXT: s_endpgm ; @@ -1918,7 +1933,8 @@ define amdgpu_kernel void @global_workgroup_release_atomicrmw( ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX10-CU-NEXT: s_endpgm ; @@ -2003,7 +2019,8 @@ define amdgpu_kernel void @global_workgroup_release_atomicrmw( ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_endpgm ; @@ -2028,7 +2045,10 @@ define amdgpu_kernel void @global_workgroup_release_atomicrmw( ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX12-CU-NEXT: s_endpgm ; @@ -2105,8 +2125,10 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw( ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_atomicrmw: @@ -2196,8 +2218,10 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw( ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_acq_rel_atomicrmw: @@ -2223,8 +2247,12 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw( ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_acq_rel_atomicrmw: @@ -2301,8 +2329,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw( ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_atomicrmw: @@ -2392,8 +2422,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw( ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_seq_cst_atomicrmw: @@ -2419,8 +2451,12 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw( ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_seq_cst_atomicrmw: @@ -2705,7 +2741,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw( ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -2807,7 +2844,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -2837,7 +2875,10 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -2926,7 +2967,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw( ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -3028,7 +3070,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -3058,7 +3101,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -3644,7 +3690,8 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -3758,7 +3805,8 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 ; GFX11-CU-NEXT: s_endpgm ; @@ -3791,7 +3839,10 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 ; GFX12-CU-NEXT: s_endpgm ; @@ -3900,8 +3951,10 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg: @@ -4020,8 +4073,10 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg: @@ -4055,8 +4110,12 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg: @@ -4165,8 +4224,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg: @@ -4285,8 +4346,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg: @@ -4320,8 +4383,12 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg: @@ -4920,8 +4987,10 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_acquire_cmpxchg: @@ -5040,8 +5109,10 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_release_acquire_cmpxchg: @@ -5075,8 +5146,12 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_release_acquire_cmpxchg: @@ -5185,8 +5260,10 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_acquire_cmpxchg: @@ -5305,8 +5382,10 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_acq_rel_acquire_cmpxchg: @@ -5340,8 +5419,12 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_acq_rel_acquire_cmpxchg: @@ -5450,8 +5533,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_acquire_cmpxchg: @@ -5570,8 +5655,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_seq_cst_acquire_cmpxchg: @@ -5605,8 +5692,12 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_seq_cst_acquire_cmpxchg: @@ -5715,7 +5806,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -5835,7 +5927,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 ; GFX11-CU-NEXT: s_endpgm ; @@ -5870,7 +5963,10 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 ; GFX12-CU-NEXT: s_endpgm ; @@ -5980,7 +6076,8 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -6100,7 +6197,8 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 ; GFX11-CU-NEXT: s_endpgm ; @@ -6135,7 +6233,10 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 ; GFX12-CU-NEXT: s_endpgm ; @@ -6245,8 +6346,10 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_seq_cst_cmpxchg: @@ -6365,8 +6468,10 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_release_seq_cst_cmpxchg: @@ -6400,8 +6505,12 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_release_seq_cst_cmpxchg: @@ -6510,8 +6619,10 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_seq_cst_cmpxchg: @@ -6630,8 +6741,10 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_acq_rel_seq_cst_cmpxchg: @@ -6665,8 +6778,12 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_acq_rel_seq_cst_cmpxchg: @@ -6775,8 +6892,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg: @@ -6895,8 +7014,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg: @@ -6930,8 +7051,12 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg: @@ -7588,7 +7713,8 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -7717,7 +7843,8 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -7754,7 +7881,10 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -7877,7 +8007,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -8009,7 +8140,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8047,7 +8179,10 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8170,7 +8305,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -8302,7 +8438,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8340,7 +8477,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9009,7 +9149,8 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -9141,7 +9282,8 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9179,7 +9321,10 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9302,7 +9447,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -9434,7 +9580,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9472,7 +9619,10 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9595,7 +9745,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -9727,7 +9878,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9765,7 +9917,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9888,7 +10043,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -10020,7 +10176,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10058,7 +10215,10 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10181,7 +10341,8 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -10313,7 +10474,8 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10351,7 +10513,10 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10474,7 +10639,8 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -10606,7 +10772,8 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10644,7 +10811,10 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10767,7 +10937,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -10899,7 +11070,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10937,7 +11109,10 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -11060,7 +11235,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -11192,7 +11368,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -11230,7 +11407,10 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -11914,7 +12094,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_load( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -12009,7 +12190,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_load( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -12036,6 +12218,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_load( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 @@ -12447,6 +12633,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_store( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] ; GFX10-CU-NEXT: s_endpgm ; @@ -12529,6 +12717,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_store( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_endpgm ; @@ -12553,6 +12743,10 @@ define amdgpu_kernel void @global_workgroup_one_as_release_store( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12-CU-NEXT: s_endpgm ; @@ -12626,6 +12820,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_store( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] ; GFX10-CU-NEXT: s_endpgm ; @@ -12708,6 +12904,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_store( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_endpgm ; @@ -12732,6 +12930,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_store( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12-CU-NEXT: s_endpgm ; @@ -13145,6 +13347,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_atomicrmw( ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX10-CU-NEXT: s_endpgm ; @@ -13226,6 +13430,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_atomicrmw( ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_endpgm ; @@ -13250,6 +13456,10 @@ define amdgpu_kernel void @global_workgroup_one_as_release_atomicrmw( ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX12-CU-NEXT: s_endpgm ; @@ -13324,7 +13534,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_atomicrmw( ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_atomicrmw: @@ -13411,7 +13624,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_one_as_acq_rel_atomicrmw: @@ -13437,7 +13653,12 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_atomicrmw( ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_one_as_acq_rel_atomicrmw: @@ -13512,7 +13733,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_atomicrmw( ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_atomicrmw: @@ -13599,7 +13823,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_one_as_seq_cst_atomicrmw: @@ -13625,7 +13852,12 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_atomicrmw( ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_one_as_seq_cst_atomicrmw: @@ -13908,6 +14140,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_ret_atomicrmw( ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -14006,6 +14240,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -14035,6 +14271,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -14121,6 +14361,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_ret_atomicrmw( ; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -14219,6 +14461,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -14248,6 +14492,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -14831,6 +15079,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -14941,6 +15191,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 ; GFX11-CU-NEXT: s_endpgm ; @@ -14973,6 +15225,10 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 ; GFX12-CU-NEXT: s_endpgm ; @@ -15079,7 +15335,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg: @@ -15195,7 +15454,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg: @@ -15229,7 +15491,12 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg: @@ -15336,7 +15603,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg: @@ -15452,7 +15722,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg: @@ -15486,7 +15759,12 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg: @@ -16083,7 +16361,10 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_acquire_cmpxchg: @@ -16199,7 +16480,10 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_one_as_release_acquire_cmpxchg: @@ -16233,7 +16517,12 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_one_as_release_acquire_cmpxchg: @@ -16340,7 +16629,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg: @@ -16456,7 +16748,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg: @@ -16490,7 +16785,12 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg: @@ -16597,7 +16897,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg: @@ -16713,7 +17016,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg: @@ -16747,7 +17053,12 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg: @@ -16854,6 +17165,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -16970,6 +17283,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 ; GFX11-CU-NEXT: s_endpgm ; @@ -17004,6 +17319,10 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 ; GFX12-CU-NEXT: s_endpgm ; @@ -17111,6 +17430,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -17227,6 +17548,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 ; GFX11-CU-NEXT: s_endpgm ; @@ -17261,6 +17584,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 ; GFX12-CU-NEXT: s_endpgm ; @@ -17368,7 +17695,10 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_seq_cst_cmpxchg: @@ -17484,7 +17814,10 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_one_as_release_seq_cst_cmpxchg: @@ -17518,7 +17851,12 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_one_as_release_seq_cst_cmpxchg: @@ -17625,7 +17963,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_seq_cst_cmpxchg: @@ -17741,7 +18082,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_one_as_acq_rel_seq_cst_cmpxchg: @@ -17775,7 +18119,12 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_one_as_acq_rel_seq_cst_cmpxchg: @@ -17882,7 +18231,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg: @@ -17998,7 +18350,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm ; ; GFX12-WGP-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg: @@ -18032,7 +18387,12 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg: @@ -18687,6 +19047,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_ret_cmpxchg ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -18812,6 +19174,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_ret_cmpxchg ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -18848,6 +19212,10 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_ret_cmpxchg ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -18968,6 +19336,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -19096,6 +19466,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -19133,6 +19505,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -19253,6 +19629,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -19381,6 +19759,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -19418,6 +19798,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20084,6 +20468,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -20212,6 +20598,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20249,6 +20637,10 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20369,6 +20761,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -20497,6 +20891,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20534,6 +20930,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20654,6 +21054,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -20782,6 +21184,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20819,6 +21223,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20939,6 +21347,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -21067,6 +21477,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21104,6 +21516,10 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21224,6 +21640,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -21352,6 +21770,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21389,6 +21809,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21509,6 +21933,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -21637,6 +22063,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21674,6 +22102,10 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21794,6 +22226,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -21922,6 +22356,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21959,6 +22395,10 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22079,6 +22519,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 ; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -22207,6 +22649,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22244,6 +22688,10 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll index 986b48b60a443..712109d2f67f9 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll @@ -622,7 +622,8 @@ define amdgpu_kernel void @local_agent_seq_cst_load( ; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_read_b32 v1, v0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -719,7 +720,8 @@ define amdgpu_kernel void @local_agent_seq_cst_load( ; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_load_b32 v1, v0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -749,7 +751,10 @@ define amdgpu_kernel void @local_agent_seq_cst_load( ; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_load_b32 v1, v0 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -1121,7 +1126,8 @@ define amdgpu_kernel void @local_agent_release_store( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -1200,7 +1206,8 @@ define amdgpu_kernel void @local_agent_release_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm ; @@ -1225,7 +1232,10 @@ define amdgpu_kernel void @local_agent_release_store( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_store_b32 v0, v1 ; GFX12-CU-NEXT: s_endpgm ; @@ -1291,7 +1301,8 @@ define amdgpu_kernel void @local_agent_seq_cst_store( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -1370,7 +1381,8 @@ define amdgpu_kernel void @local_agent_seq_cst_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm ; @@ -1395,7 +1407,10 @@ define amdgpu_kernel void @local_agent_seq_cst_store( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_store_b32 v0, v1 ; GFX12-CU-NEXT: s_endpgm ; @@ -1778,7 +1793,8 @@ define amdgpu_kernel void @local_agent_release_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -1857,7 +1873,8 @@ define amdgpu_kernel void @local_agent_release_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm ; @@ -1882,7 +1899,10 @@ define amdgpu_kernel void @local_agent_release_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX12-CU-NEXT: s_endpgm ; @@ -1952,7 +1972,8 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -2039,7 +2060,8 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -2067,7 +2089,10 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -2139,7 +2164,8 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -2226,7 +2252,8 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -2254,7 +2281,10 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -2535,7 +2565,8 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -2639,7 +2670,8 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -2671,7 +2703,10 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -2756,7 +2791,8 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -2860,7 +2896,8 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -2892,7 +2929,10 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -3348,7 +3388,8 @@ define amdgpu_kernel void @local_agent_release_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -3441,7 +3482,8 @@ define amdgpu_kernel void @local_agent_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm ; @@ -3470,7 +3512,10 @@ define amdgpu_kernel void @local_agent_release_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_endpgm ; @@ -3551,7 +3596,8 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3652,7 +3698,8 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3684,7 +3731,10 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -3767,7 +3817,8 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3868,7 +3919,8 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3900,7 +3952,10 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -4375,7 +4430,8 @@ define amdgpu_kernel void @local_agent_release_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -4476,7 +4532,8 @@ define amdgpu_kernel void @local_agent_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -4508,7 +4565,10 @@ define amdgpu_kernel void @local_agent_release_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -4591,7 +4651,8 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -4692,7 +4753,8 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -4724,7 +4786,10 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -4807,7 +4872,8 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -4908,7 +4974,8 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -4940,7 +5007,10 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5023,7 +5093,8 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5124,7 +5195,8 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -5156,7 +5228,10 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5239,7 +5314,8 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5340,7 +5416,8 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -5372,7 +5449,10 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5455,7 +5535,8 @@ define amdgpu_kernel void @local_agent_release_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5556,7 +5637,8 @@ define amdgpu_kernel void @local_agent_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -5588,7 +5670,10 @@ define amdgpu_kernel void @local_agent_release_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5671,7 +5756,8 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5772,7 +5858,8 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -5804,7 +5891,10 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5887,7 +5977,8 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5988,7 +6079,8 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -6020,7 +6112,10 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -6567,7 +6662,8 @@ define amdgpu_kernel void @local_agent_release_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -6682,7 +6778,8 @@ define amdgpu_kernel void @local_agent_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -6717,7 +6814,10 @@ define amdgpu_kernel void @local_agent_release_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 @@ -6814,7 +6914,8 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -6932,7 +7033,8 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -6968,7 +7070,10 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -7065,7 +7170,8 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -7183,7 +7289,8 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -7219,7 +7326,10 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -7778,7 +7888,8 @@ define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -7896,7 +8007,8 @@ define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -7932,7 +8044,10 @@ define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8029,7 +8144,8 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -8147,7 +8263,8 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8183,7 +8300,10 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8280,7 +8400,8 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -8398,7 +8519,8 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8434,7 +8556,10 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8531,7 +8656,8 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -8649,7 +8775,8 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8685,7 +8812,10 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8782,7 +8912,8 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -8900,7 +9031,8 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8936,7 +9068,10 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9033,7 +9168,8 @@ define amdgpu_kernel void @local_agent_release_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -9151,7 +9287,8 @@ define amdgpu_kernel void @local_agent_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9187,7 +9324,10 @@ define amdgpu_kernel void @local_agent_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9284,7 +9424,8 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -9402,7 +9543,8 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9438,7 +9580,10 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9535,7 +9680,8 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -9653,7 +9799,8 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9689,7 +9836,10 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-cluster.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-cluster.ll index 8926893c68dbc..6d1e4e6a96119 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-cluster.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-cluster.ll @@ -622,7 +622,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_load( ; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_read_b32 v1, v0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -719,7 +720,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_load( ; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_load_b32 v1, v0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -749,7 +751,10 @@ define amdgpu_kernel void @local_cluster_seq_cst_load( ; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_load_b32 v1, v0 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -1121,7 +1126,8 @@ define amdgpu_kernel void @local_cluster_release_store( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -1200,7 +1206,8 @@ define amdgpu_kernel void @local_cluster_release_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm ; @@ -1225,7 +1232,10 @@ define amdgpu_kernel void @local_cluster_release_store( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_store_b32 v0, v1 ; GFX12-CU-NEXT: s_endpgm ; @@ -1291,7 +1301,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_store( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -1370,7 +1381,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm ; @@ -1395,7 +1407,10 @@ define amdgpu_kernel void @local_cluster_seq_cst_store( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_store_b32 v0, v1 ; GFX12-CU-NEXT: s_endpgm ; @@ -1778,7 +1793,8 @@ define amdgpu_kernel void @local_cluster_release_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -1857,7 +1873,8 @@ define amdgpu_kernel void @local_cluster_release_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm ; @@ -1882,7 +1899,10 @@ define amdgpu_kernel void @local_cluster_release_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX12-CU-NEXT: s_endpgm ; @@ -1952,7 +1972,8 @@ define amdgpu_kernel void @local_cluster_acq_rel_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -2039,7 +2060,8 @@ define amdgpu_kernel void @local_cluster_acq_rel_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -2067,7 +2089,10 @@ define amdgpu_kernel void @local_cluster_acq_rel_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -2139,7 +2164,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -2226,7 +2252,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -2254,7 +2281,10 @@ define amdgpu_kernel void @local_cluster_seq_cst_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -2535,7 +2565,8 @@ define amdgpu_kernel void @local_cluster_acq_rel_ret_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -2639,7 +2670,8 @@ define amdgpu_kernel void @local_cluster_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -2671,7 +2703,10 @@ define amdgpu_kernel void @local_cluster_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -2756,7 +2791,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_ret_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -2860,7 +2896,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -2892,7 +2929,10 @@ define amdgpu_kernel void @local_cluster_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -3348,7 +3388,8 @@ define amdgpu_kernel void @local_cluster_release_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -3441,7 +3482,8 @@ define amdgpu_kernel void @local_cluster_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm ; @@ -3470,7 +3512,10 @@ define amdgpu_kernel void @local_cluster_release_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_endpgm ; @@ -3551,7 +3596,8 @@ define amdgpu_kernel void @local_cluster_acq_rel_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3652,7 +3698,8 @@ define amdgpu_kernel void @local_cluster_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3684,7 +3731,10 @@ define amdgpu_kernel void @local_cluster_acq_rel_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -3767,7 +3817,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3868,7 +3919,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3900,7 +3952,10 @@ define amdgpu_kernel void @local_cluster_seq_cst_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -4375,7 +4430,8 @@ define amdgpu_kernel void @local_cluster_release_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -4476,7 +4532,8 @@ define amdgpu_kernel void @local_cluster_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -4508,7 +4565,10 @@ define amdgpu_kernel void @local_cluster_release_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -4591,7 +4651,8 @@ define amdgpu_kernel void @local_cluster_acq_rel_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -4692,7 +4753,8 @@ define amdgpu_kernel void @local_cluster_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -4724,7 +4786,10 @@ define amdgpu_kernel void @local_cluster_acq_rel_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -4807,7 +4872,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -4908,7 +4974,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -4940,7 +5007,10 @@ define amdgpu_kernel void @local_cluster_seq_cst_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5023,7 +5093,8 @@ define amdgpu_kernel void @local_cluster_monotonic_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5124,7 +5195,8 @@ define amdgpu_kernel void @local_cluster_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -5156,7 +5228,10 @@ define amdgpu_kernel void @local_cluster_monotonic_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5239,7 +5314,8 @@ define amdgpu_kernel void @local_cluster_acquire_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5340,7 +5416,8 @@ define amdgpu_kernel void @local_cluster_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -5372,7 +5449,10 @@ define amdgpu_kernel void @local_cluster_acquire_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5455,7 +5535,8 @@ define amdgpu_kernel void @local_cluster_release_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5556,7 +5637,8 @@ define amdgpu_kernel void @local_cluster_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -5588,7 +5670,10 @@ define amdgpu_kernel void @local_cluster_release_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5671,7 +5756,8 @@ define amdgpu_kernel void @local_cluster_acq_rel_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5772,7 +5858,8 @@ define amdgpu_kernel void @local_cluster_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -5804,7 +5891,10 @@ define amdgpu_kernel void @local_cluster_acq_rel_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5887,7 +5977,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5988,7 +6079,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -6020,7 +6112,10 @@ define amdgpu_kernel void @local_cluster_seq_cst_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -6567,7 +6662,8 @@ define amdgpu_kernel void @local_cluster_release_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -6682,7 +6778,8 @@ define amdgpu_kernel void @local_cluster_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -6717,7 +6814,10 @@ define amdgpu_kernel void @local_cluster_release_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 @@ -6814,7 +6914,8 @@ define amdgpu_kernel void @local_cluster_acq_rel_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -6932,7 +7033,8 @@ define amdgpu_kernel void @local_cluster_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -6968,7 +7070,10 @@ define amdgpu_kernel void @local_cluster_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -7065,7 +7170,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -7183,7 +7289,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -7219,7 +7326,10 @@ define amdgpu_kernel void @local_cluster_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -7778,7 +7888,8 @@ define amdgpu_kernel void @local_cluster_release_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -7896,7 +8007,8 @@ define amdgpu_kernel void @local_cluster_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -7932,7 +8044,10 @@ define amdgpu_kernel void @local_cluster_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8029,7 +8144,8 @@ define amdgpu_kernel void @local_cluster_acq_rel_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -8147,7 +8263,8 @@ define amdgpu_kernel void @local_cluster_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8183,7 +8300,10 @@ define amdgpu_kernel void @local_cluster_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8280,7 +8400,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -8398,7 +8519,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8434,7 +8556,10 @@ define amdgpu_kernel void @local_cluster_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8531,7 +8656,8 @@ define amdgpu_kernel void @local_cluster_monotonic_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -8649,7 +8775,8 @@ define amdgpu_kernel void @local_cluster_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8685,7 +8812,10 @@ define amdgpu_kernel void @local_cluster_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8782,7 +8912,8 @@ define amdgpu_kernel void @local_cluster_acquire_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -8900,7 +9031,8 @@ define amdgpu_kernel void @local_cluster_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8936,7 +9068,10 @@ define amdgpu_kernel void @local_cluster_acquire_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9033,7 +9168,8 @@ define amdgpu_kernel void @local_cluster_release_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -9151,7 +9287,8 @@ define amdgpu_kernel void @local_cluster_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9187,7 +9324,10 @@ define amdgpu_kernel void @local_cluster_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9284,7 +9424,8 @@ define amdgpu_kernel void @local_cluster_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -9402,7 +9543,8 @@ define amdgpu_kernel void @local_cluster_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9438,7 +9580,10 @@ define amdgpu_kernel void @local_cluster_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9535,7 +9680,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -9653,7 +9799,8 @@ define amdgpu_kernel void @local_cluster_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9689,7 +9836,10 @@ define amdgpu_kernel void @local_cluster_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll index 81bbe0a78203e..577d2ca9514bb 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll @@ -622,7 +622,8 @@ define amdgpu_kernel void @local_system_seq_cst_load( ; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_read_b32 v1, v0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -719,7 +720,8 @@ define amdgpu_kernel void @local_system_seq_cst_load( ; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_load_b32 v1, v0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -749,7 +751,10 @@ define amdgpu_kernel void @local_system_seq_cst_load( ; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_load_b32 v1, v0 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -1121,7 +1126,8 @@ define amdgpu_kernel void @local_system_release_store( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -1200,7 +1206,8 @@ define amdgpu_kernel void @local_system_release_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm ; @@ -1225,7 +1232,10 @@ define amdgpu_kernel void @local_system_release_store( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_store_b32 v0, v1 ; GFX12-CU-NEXT: s_endpgm ; @@ -1291,7 +1301,8 @@ define amdgpu_kernel void @local_system_seq_cst_store( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -1370,7 +1381,8 @@ define amdgpu_kernel void @local_system_seq_cst_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm ; @@ -1395,7 +1407,10 @@ define amdgpu_kernel void @local_system_seq_cst_store( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_store_b32 v0, v1 ; GFX12-CU-NEXT: s_endpgm ; @@ -1778,7 +1793,8 @@ define amdgpu_kernel void @local_system_release_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -1857,7 +1873,8 @@ define amdgpu_kernel void @local_system_release_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm ; @@ -1882,7 +1899,10 @@ define amdgpu_kernel void @local_system_release_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX12-CU-NEXT: s_endpgm ; @@ -1952,7 +1972,8 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -2039,7 +2060,8 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -2067,7 +2089,10 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -2139,7 +2164,8 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -2226,7 +2252,8 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -2254,7 +2281,10 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -2535,7 +2565,8 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -2639,7 +2670,8 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -2671,7 +2703,10 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -2756,7 +2791,8 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -2860,7 +2896,8 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -2892,7 +2929,10 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -3348,7 +3388,8 @@ define amdgpu_kernel void @local_system_release_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -3441,7 +3482,8 @@ define amdgpu_kernel void @local_system_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm ; @@ -3470,7 +3512,10 @@ define amdgpu_kernel void @local_system_release_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_endpgm ; @@ -3551,7 +3596,8 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3652,7 +3698,8 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3684,7 +3731,10 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -3767,7 +3817,8 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3868,7 +3919,8 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3900,7 +3952,10 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -4375,7 +4430,8 @@ define amdgpu_kernel void @local_system_release_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -4476,7 +4532,8 @@ define amdgpu_kernel void @local_system_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -4508,7 +4565,10 @@ define amdgpu_kernel void @local_system_release_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -4591,7 +4651,8 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -4692,7 +4753,8 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -4724,7 +4786,10 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -4807,7 +4872,8 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -4908,7 +4974,8 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -4940,7 +5007,10 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5023,7 +5093,8 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5124,7 +5195,8 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -5156,7 +5228,10 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5239,7 +5314,8 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5340,7 +5416,8 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -5372,7 +5449,10 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5455,7 +5535,8 @@ define amdgpu_kernel void @local_system_release_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5556,7 +5637,8 @@ define amdgpu_kernel void @local_system_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -5588,7 +5670,10 @@ define amdgpu_kernel void @local_system_release_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5671,7 +5756,8 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5772,7 +5858,8 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -5804,7 +5891,10 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5887,7 +5977,8 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5988,7 +6079,8 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -6020,7 +6112,10 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -6567,7 +6662,8 @@ define amdgpu_kernel void @local_system_release_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -6682,7 +6778,8 @@ define amdgpu_kernel void @local_system_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -6717,7 +6814,10 @@ define amdgpu_kernel void @local_system_release_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 @@ -6814,7 +6914,8 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -6932,7 +7033,8 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -6968,7 +7070,10 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -7065,7 +7170,8 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -7183,7 +7289,8 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -7219,7 +7326,10 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -7778,7 +7888,8 @@ define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -7896,7 +8007,8 @@ define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -7932,7 +8044,10 @@ define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8029,7 +8144,8 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -8147,7 +8263,8 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8183,7 +8300,10 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8280,7 +8400,8 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -8398,7 +8519,8 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8434,7 +8556,10 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8531,7 +8656,8 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -8649,7 +8775,8 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8685,7 +8812,10 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8782,7 +8912,8 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -8900,7 +9031,8 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8936,7 +9068,10 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9033,7 +9168,8 @@ define amdgpu_kernel void @local_system_release_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -9151,7 +9287,8 @@ define amdgpu_kernel void @local_system_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9187,7 +9324,10 @@ define amdgpu_kernel void @local_system_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9284,7 +9424,8 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -9402,7 +9543,8 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9438,7 +9580,10 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9535,7 +9680,8 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -9653,7 +9799,8 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9689,7 +9836,10 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll index 980141a87ecf3..d686e7a2d5b4c 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll @@ -819,7 +819,8 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -854,7 +855,8 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm ; @@ -879,7 +881,10 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_store_b32 v0, v1 ; GFX12-CU-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll index 6a233a2c9013b..ab4d7834b23a5 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll @@ -622,7 +622,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_load( ; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_read_b32 v1, v0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -719,7 +720,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_load( ; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_load_b32 v1, v0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -749,7 +751,10 @@ define amdgpu_kernel void @local_workgroup_seq_cst_load( ; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_load_b32 v1, v0 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -1121,7 +1126,8 @@ define amdgpu_kernel void @local_workgroup_release_store( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -1200,7 +1206,8 @@ define amdgpu_kernel void @local_workgroup_release_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm ; @@ -1225,7 +1232,10 @@ define amdgpu_kernel void @local_workgroup_release_store( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_store_b32 v0, v1 ; GFX12-CU-NEXT: s_endpgm ; @@ -1291,7 +1301,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_store( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -1370,7 +1381,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm ; @@ -1395,7 +1407,10 @@ define amdgpu_kernel void @local_workgroup_seq_cst_store( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_store_b32 v0, v1 ; GFX12-CU-NEXT: s_endpgm ; @@ -1778,7 +1793,8 @@ define amdgpu_kernel void @local_workgroup_release_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -1857,7 +1873,8 @@ define amdgpu_kernel void @local_workgroup_release_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm ; @@ -1882,7 +1899,10 @@ define amdgpu_kernel void @local_workgroup_release_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX12-CU-NEXT: s_endpgm ; @@ -1952,7 +1972,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -2039,7 +2060,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -2067,7 +2089,10 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -2139,7 +2164,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -2226,7 +2252,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -2254,7 +2281,10 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -2535,7 +2565,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -2639,7 +2670,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -2671,7 +2703,10 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -2756,7 +2791,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -2860,7 +2896,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -2892,7 +2929,10 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -3348,7 +3388,8 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -3441,7 +3482,8 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm ; @@ -3470,7 +3512,10 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_endpgm ; @@ -3551,7 +3596,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3652,7 +3698,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3684,7 +3731,10 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -3767,7 +3817,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3868,7 +3919,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3900,7 +3952,10 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -4375,7 +4430,8 @@ define amdgpu_kernel void @local_workgroup_release_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -4476,7 +4532,8 @@ define amdgpu_kernel void @local_workgroup_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -4508,7 +4565,10 @@ define amdgpu_kernel void @local_workgroup_release_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -4591,7 +4651,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -4692,7 +4753,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -4724,7 +4786,10 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -4807,7 +4872,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -4908,7 +4974,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -4940,7 +5007,10 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5023,7 +5093,8 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5124,7 +5195,8 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -5156,7 +5228,10 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5239,7 +5314,8 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5340,7 +5416,8 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -5372,7 +5449,10 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5455,7 +5535,8 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5556,7 +5637,8 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -5588,7 +5670,10 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5671,7 +5756,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5772,7 +5858,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -5804,7 +5891,10 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -5887,7 +5977,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -5988,7 +6079,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -6020,7 +6112,10 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: s_endpgm @@ -6567,7 +6662,8 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -6682,7 +6778,8 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -6717,7 +6814,10 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 @@ -6814,7 +6914,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -6932,7 +7033,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -6968,7 +7070,10 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -7065,7 +7170,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -7183,7 +7289,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -7219,7 +7326,10 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -7778,7 +7888,8 @@ define amdgpu_kernel void @local_workgroup_release_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -7896,7 +8007,8 @@ define amdgpu_kernel void @local_workgroup_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -7932,7 +8044,10 @@ define amdgpu_kernel void @local_workgroup_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8029,7 +8144,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -8147,7 +8263,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8183,7 +8300,10 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8280,7 +8400,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -8398,7 +8519,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8434,7 +8556,10 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8531,7 +8656,8 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -8649,7 +8775,8 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8685,7 +8812,10 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8782,7 +8912,8 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -8900,7 +9031,8 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8936,7 +9068,10 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9033,7 +9168,8 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -9151,7 +9287,8 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9187,7 +9324,10 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9284,7 +9424,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -9402,7 +9543,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9438,7 +9580,10 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9535,7 +9680,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 @@ -9653,7 +9799,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9689,7 +9836,10 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 From ec26f219acce77fb9b3d52abd31b0e639e788514 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 21 Oct 2025 09:27:07 +0200 Subject: [PATCH 04/99] [InstSimplify] Support ptrtoaddr in simplifyGEPInst() (#164262) This adds support for ptrtoaddr in the `ptradd p, ptrtoaddr(p2) - ptrtoaddr(p) -> p2` fold. This fold requires that p and p2 have the same underlying object (otherwise the provenance may not be the same). The argument I would like to make here is that because the underlying objects are the same (and the pointers in the same address space), the non-address bits of the pointer must be the same. Looking at some specific cases of underlying object relationship: * phi/select: Trivially true. * getelementptr: Only modifies address bits, non-address bits must remain the same. * addrspacecast round-trip cast: Must preserve all bits because we optimize such round-trip casts away. * non-interposable global alias: I'm a bit unsure about this one, but I guess the alias and the aliasee must have the same non-address bits? * various intrinsics like launder.invariant.group, ptrmask. I think these all either preserve all pointer bits (like the invariant.group ones) or at least the non-address bits (like ptrmask). There are some interesting cases like amdgcn.make.buffer.rsrc, but those are cross address-space. ----- There is a second `gep (gep p, C), (sub 0, ptrtoint(p)) -> C` transform in this function, which I am not extending to handle ptrtoaddr, adding negative tests instead. This transform is overall dubious for provenance reasons, but especially dubious with ptrtoaddr, as then we don't have the guarantee that provenance of `p` has been exposed. --- llvm/lib/Analysis/InstructionSimplify.cpp | 19 +-- .../test/Transforms/InstSimplify/ptrtoaddr.ll | 109 ++++++++++++++++++ 2 files changed, 119 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index dc813f6ea9728..8da51d039f197 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5106,32 +5106,33 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr, return Ptr; // The following transforms are only safe if the ptrtoint cast - // doesn't truncate the pointers. - if (Indices[0]->getType()->getScalarSizeInBits() == - Q.DL.getPointerSizeInBits(AS)) { + // doesn't truncate the address of the pointers. The non-address bits + // must be the same, as the underlying objects are the same. + if (Indices[0]->getType()->getScalarSizeInBits() >= + Q.DL.getAddressSizeInBits(AS)) { auto CanSimplify = [GEPTy, &P, Ptr]() -> bool { return P->getType() == GEPTy && getUnderlyingObject(P) == getUnderlyingObject(Ptr); }; // getelementptr V, (sub P, V) -> P if P points to a type of size 1. if (TyAllocSize == 1 && - match(Indices[0], - m_Sub(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Specific(Ptr)))) && + match(Indices[0], m_Sub(m_PtrToIntOrAddr(m_Value(P)), + m_PtrToIntOrAddr(m_Specific(Ptr)))) && CanSimplify()) return P; // getelementptr V, (ashr (sub P, V), C) -> P if P points to a type of // size 1 << C. - if (match(Indices[0], m_AShr(m_Sub(m_PtrToInt(m_Value(P)), - m_PtrToInt(m_Specific(Ptr))), + if (match(Indices[0], m_AShr(m_Sub(m_PtrToIntOrAddr(m_Value(P)), + m_PtrToIntOrAddr(m_Specific(Ptr))), m_ConstantInt(C))) && TyAllocSize == 1ULL << C && CanSimplify()) return P; // getelementptr V, (sdiv (sub P, V), C) -> P if P points to a type of // size C. - if (match(Indices[0], m_SDiv(m_Sub(m_PtrToInt(m_Value(P)), - m_PtrToInt(m_Specific(Ptr))), + if (match(Indices[0], m_SDiv(m_Sub(m_PtrToIntOrAddr(m_Value(P)), + m_PtrToIntOrAddr(m_Specific(Ptr))), m_SpecificInt(TyAllocSize))) && CanSimplify()) return P; diff --git a/llvm/test/Transforms/InstSimplify/ptrtoaddr.ll b/llvm/test/Transforms/InstSimplify/ptrtoaddr.ll index 03a2d4b57f614..d06b520931b92 100644 --- a/llvm/test/Transforms/InstSimplify/ptrtoaddr.ll +++ b/llvm/test/Transforms/InstSimplify/ptrtoaddr.ll @@ -207,3 +207,112 @@ define i32 @ptrtoaddr_of_ptradd_of_sub_addrsize(i32 %x, ptr addrspace(1) %p) { %ptradd.addr = ptrtoaddr ptr addrspace(1) %ptradd to i32 ret i32 %ptradd.addr } + +define ptr @gep_of_sub_ptrtoaddr_unrelated_pointers(ptr %p, ptr %p2, i64 %x) { +; CHECK-LABEL: define ptr @gep_of_sub_ptrtoaddr_unrelated_pointers( +; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[P2_ADDR:%.*]] = ptrtoaddr ptr [[P2]] to i64 +; CHECK-NEXT: [[P_ADDR:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[P2_ADDR]], [[P_ADDR]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[P]], i64 [[SUB]] +; CHECK-NEXT: ret ptr [[GEP2]] +; + %p2.addr = ptrtoaddr ptr %p2 to i64 + %p.addr = ptrtoaddr ptr %p to i64 + %sub = sub i64 %p2.addr, %p.addr + %gep2 = getelementptr i8, ptr %p, i64 %sub + ret ptr %gep2 +} + +define ptr @gep_of_sub_ptrtoaddr(ptr %p, i64 %x) { +; CHECK-LABEL: define ptr @gep_of_sub_ptrtoaddr( +; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 [[X]] +; CHECK-NEXT: ret ptr [[GEP1]] +; + %gep1 = getelementptr i8, ptr %p, i64 %x + %gep1.addr = ptrtoaddr ptr %gep1 to i64 + %p.addr = ptrtoaddr ptr %p to i64 + %sub = sub i64 %gep1.addr, %p.addr + %gep2 = getelementptr i8, ptr %p, i64 %sub + ret ptr %gep2 +} + +define ptr addrspace(1) @gep_of_sub_ptrtoaddr_addrsize(ptr addrspace(1) %p, i32 %x) { +; CHECK-LABEL: define ptr addrspace(1) @gep_of_sub_ptrtoaddr_addrsize( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[X:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(1) [[P]], i32 [[X]] +; CHECK-NEXT: ret ptr addrspace(1) [[GEP1]] +; + %gep1 = getelementptr i8, ptr addrspace(1) %p, i32 %x + %gep1.addr = ptrtoaddr ptr addrspace(1) %gep1 to i32 + %p.addr = ptrtoaddr ptr addrspace(1) %p to i32 + %sub = sub i32 %gep1.addr, %p.addr + %gep2 = getelementptr i8, ptr addrspace(1) %p, i32 %sub + ret ptr addrspace(1) %gep2 +} + +define ptr @gep_of_sub_ptrtoaddr_ashr(ptr %p, i64 %x) { +; CHECK-LABEL: define ptr @gep_of_sub_ptrtoaddr_ashr( +; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 [[X]] +; CHECK-NEXT: ret ptr [[GEP1]] +; + %gep1 = getelementptr i8, ptr %p, i64 %x + %gep1.addr = ptrtoaddr ptr %gep1 to i64 + %p.addr = ptrtoaddr ptr %p to i64 + %sub = sub i64 %gep1.addr, %p.addr + %ashr = ashr i64 %sub, 1 + %gep2 = getelementptr i16, ptr %p, i64 %ashr + ret ptr %gep2 +} + +define ptr addrspace(1) @gep_of_sub_ptrtoaddr_ashr_addrsize(ptr addrspace(1) %p, i32 %x) { +; CHECK-LABEL: define ptr addrspace(1) @gep_of_sub_ptrtoaddr_ashr_addrsize( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[X:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(1) [[P]], i32 [[X]] +; CHECK-NEXT: ret ptr addrspace(1) [[GEP1]] +; + %gep1 = getelementptr i8, ptr addrspace(1) %p, i32 %x + %gep1.addr = ptrtoaddr ptr addrspace(1) %gep1 to i32 + %p.addr = ptrtoaddr ptr addrspace(1) %p to i32 + %sub = sub i32 %gep1.addr, %p.addr + %sdiv = sdiv i32 %sub, 3 + %gep2 = getelementptr [3 x i8], ptr addrspace(1) %p, i32 %sdiv + ret ptr addrspace(1) %gep2 +} + +; Not folding this to inttoptr(123), as this may have different provenance from +; %p, and the use of ptrtoaddr implies that the provenance of %p may not be +; exposed, such that inttoptr cannot recover it. +define ptr @gep_gep_neg_ptrtoaddr(ptr %p) { +; CHECK-LABEL: define ptr @gep_gep_neg_ptrtoaddr( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 123 +; CHECK-NEXT: [[P_ADDR:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: [[P_ADDR_NEG:%.*]] = sub i64 0, [[P_ADDR]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[P_ADDR_NEG]] +; CHECK-NEXT: ret ptr [[GEP2]] +; + %gep1 = getelementptr inbounds i8, ptr %p, i64 123 + %p.addr = ptrtoaddr ptr %p to i64 + %p.addr.neg = sub i64 0, %p.addr + %gep2 = getelementptr i8, ptr %gep1, i64 %p.addr.neg + ret ptr %gep2 +} + +define ptr @gep_gep_inv_ptrtoaddr(ptr %p) { +; CHECK-LABEL: define ptr @gep_gep_inv_ptrtoaddr( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 123 +; CHECK-NEXT: [[P_ADDR:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: [[P_ADDR_INV:%.*]] = xor i64 [[P_ADDR]], -1 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[P_ADDR_INV]] +; CHECK-NEXT: ret ptr [[GEP2]] +; + %gep1 = getelementptr inbounds i8, ptr %p, i64 123 + %p.addr = ptrtoaddr ptr %p to i64 + %p.addr.inv = xor i64 %p.addr, -1 + %gep2 = getelementptr i8, ptr %gep1, i64 %p.addr.inv + ret ptr %gep2 +} From db478ba75d342c4926685a90206bfb630c39f19e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 21 Oct 2025 09:39:18 +0200 Subject: [PATCH 05/99] [Hexagon] Add REQUIRES: asserts to test This test uses -debug-only, so needs an assertion-enabled build. --- llvm/test/CodeGen/Hexagon/insert-big.ll | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/test/CodeGen/Hexagon/insert-big.ll b/llvm/test/CodeGen/Hexagon/insert-big.ll index a298930b6da6d..8735a6679bf54 100644 --- a/llvm/test/CodeGen/Hexagon/insert-big.ll +++ b/llvm/test/CodeGen/Hexagon/insert-big.ll @@ -14,6 +14,8 @@ ; RUN: llc -O2 -mtriple=hexagon -insert-max-ifmap=4 -debug-only=hexinsert -stop-after hexinsert < %s 2>&1 | FileCheck %s ; RUN: llc -O2 -mtriple=hexagon -insert-max-ifmap=5 -debug-only=hexinsert -stop-after hexinsert < %s 2>&1 | FileCheck %s +; REQUIRES: asserts + define i32 @f(i32 %0, i32 %1, i32 %2) { entry: switch i32 %0, label %common.ret1 [ From 34c6fa3071227166390849161180ab321c27035f Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 21 Oct 2025 08:40:41 +0100 Subject: [PATCH 06/99] [AArch64] Combing scalar_to_reg into DUP if the DUP already exists (#160499) If we already have a dup(x) as part of the DAG along with a scalar_to_vec(x), we can re-use the result of the dup to the scalar_to_vec(x). --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 10 +++++++++- .../test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll | 9 ++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 662d84b7a60a8..a81de5c5adc34 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -27602,6 +27602,15 @@ static SDValue performPTestFirstCombine(SDNode *N, static SDValue performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { + SDLoc DL(N); + + // If a DUP(Op0) already exists, reuse it for the scalar_to_vector. + if (DCI.isAfterLegalizeDAG()) { + if (SDNode *LN = DCI.DAG.getNodeIfExists(AArch64ISD::DUP, N->getVTList(), + N->getOperand(0))) + return SDValue(LN, 0); + } + // Let's do below transform. // // t34: v4i32 = AArch64ISD::UADDLV t2 @@ -27638,7 +27647,6 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return SDValue(); // Let's generate new sequence with AArch64ISD::NVCAST. - SDLoc DL(N); SDValue EXTRACT_SUBVEC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, UADDLV, DAG.getConstant(0, DL, MVT::i64)); diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll index b215c518dce12..0933e67ed278b 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll @@ -1371,11 +1371,10 @@ define noundef <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %sc ; CHECK-SD-NEXT: lsr x9, x0, #16 ; CHECK-SD-NEXT: adrp x8, .LCPI14_0 ; CHECK-SD-NEXT: dup v4.8h, w0 -; CHECK-SD-NEXT: dup v1.8h, w9 -; CHECK-SD-NEXT: fmov s3, w9 -; CHECK-SD-NEXT: sqneg v2.8h, v1.8h -; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] -; CHECK-SD-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v1.16b +; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI14_0] +; CHECK-SD-NEXT: dup v2.8h, w9 +; CHECK-SD-NEXT: sqneg v1.8h, v2.8h +; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b ; CHECK-SD-NEXT: rev32 v2.8h, v0.8h ; CHECK-SD-NEXT: sqdmull v3.4s, v0.4h, v4.4h ; CHECK-SD-NEXT: sqdmull2 v0.4s, v0.8h, v4.8h From 1bf7ed27c1929152d876f9965895fd87ec8ccee4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 21 Oct 2025 09:02:13 +0100 Subject: [PATCH 07/99] [CAS] OnDiskGraphDB - fix MSVC "not all control paths return a value" warnings. NFC. (#164369) --- llvm/include/llvm/CAS/OnDiskGraphDB.h | 1 + llvm/lib/CAS/OnDiskGraphDB.cpp | 3 +++ 2 files changed, 4 insertions(+) diff --git a/llvm/include/llvm/CAS/OnDiskGraphDB.h b/llvm/include/llvm/CAS/OnDiskGraphDB.h index 83017a6a54fee..5f0ee0e131c0f 100644 --- a/llvm/include/llvm/CAS/OnDiskGraphDB.h +++ b/llvm/include/llvm/CAS/OnDiskGraphDB.h @@ -380,6 +380,7 @@ class OnDiskGraphDB { case ObjectPresence::OnlyInUpstreamDB: return true; } + llvm_unreachable("Unknown ObjectPresence enum"); } /// When \p load is called for a node that doesn't exist, this function tries diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp index 72bb98c4bf65d..64cbe9dc8e159 100644 --- a/llvm/lib/CAS/OnDiskGraphDB.cpp +++ b/llvm/lib/CAS/OnDiskGraphDB.cpp @@ -836,6 +836,7 @@ uint64_t DataRecordHandle::getDataSize() const { case DataSizeFlags::Uses8B: return support::endian::read64le(DataSizePtr); } + llvm_unreachable("Unknown DataSizeFlags enum"); } void DataRecordHandle::skipDataSize(LayoutFlags LF, int64_t &RelOffset) const { @@ -863,6 +864,7 @@ uint32_t DataRecordHandle::getNumRefs() const { case NumRefsFlags::Uses8B: return support::endian::read64le(NumRefsPtr); } + llvm_unreachable("Unknown NumRefsFlags enum"); } void DataRecordHandle::skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const { @@ -1270,6 +1272,7 @@ Expected OnDiskGraphDB::isMaterialized(ObjectID Ref) { return FaultInResult.takeError(); return true; } + llvm_unreachable("Unknown ObjectPresence enum"); } Expected From 253e43590842bffcc6950cc517a7f89cafe5ec69 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Tue, 21 Oct 2025 10:20:06 +0200 Subject: [PATCH 08/99] Reapply "[libc++] Optimize __hash_table::erase(iterator, iterator)" (#162850) This reapplication fixes the use after free caused by not properly updating the bucket list in one case. Original commit message: Instead of just calling the single element `erase` on every element of the range, we can combine some of the operations in a custom implementation. Specifically, we don't need to search for the previous node or re-link the list every iteration. Removing this unnecessary work results in some nice performance improvements: ``` ----------------------------------------------------------------------------------------------------------------------- Benchmark old new ----------------------------------------------------------------------------------------------------------------------- std::unordered_set::erase(iterator, iterator) (erase half the container)/0 457 ns 459 ns std::unordered_set::erase(iterator, iterator) (erase half the container)/32 995 ns 626 ns std::unordered_set::erase(iterator, iterator) (erase half the container)/1024 18196 ns 7995 ns std::unordered_set::erase(iterator, iterator) (erase half the container)/8192 124722 ns 70125 ns std::unordered_set::erase(iterator, iterator) (erase half the container)/0 456 ns 461 ns std::unordered_set::erase(iterator, iterator) (erase half the container)/32 1183 ns 769 ns std::unordered_set::erase(iterator, iterator) (erase half the container)/1024 27827 ns 18614 ns std::unordered_set::erase(iterator, iterator) (erase half the container)/8192 266681 ns 226107 ns std::unordered_map::erase(iterator, iterator) (erase half the container)/0 455 ns 462 ns std::unordered_map::erase(iterator, iterator) (erase half the container)/32 996 ns 659 ns std::unordered_map::erase(iterator, iterator) (erase half the container)/1024 15963 ns 8108 ns std::unordered_map::erase(iterator, iterator) (erase half the container)/8192 136493 ns 71848 ns std::unordered_multiset::erase(iterator, iterator) (erase half the container)/0 454 ns 455 ns std::unordered_multiset::erase(iterator, iterator) (erase half the container)/32 985 ns 703 ns std::unordered_multiset::erase(iterator, iterator) (erase half the container)/1024 16277 ns 9085 ns std::unordered_multiset::erase(iterator, iterator) (erase half the container)/8192 125736 ns 82710 ns std::unordered_multimap::erase(iterator, iterator) (erase half the container)/0 457 ns 454 ns std::unordered_multimap::erase(iterator, iterator) (erase half the container)/32 1091 ns 646 ns std::unordered_multimap::erase(iterator, iterator) (erase half the container)/1024 17784 ns 7664 ns std::unordered_multimap::erase(iterator, iterator) (erase half the container)/8192 127098 ns 72806 ns ``` This reverts commit acc3a6234a91369b818fdd6482ded0ac32d8ffa6. --- libcxx/docs/ReleaseNotes/22.rst | 1 + libcxx/include/__hash_table | 100 +++++++++++++----- .../erase_range.pass.cpp | 31 ++++++ .../unord/unord.multiset/erase_range.pass.cpp | 17 +++ 4 files changed, 124 insertions(+), 25 deletions(-) diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst index ada8b413e1259..e95cbc0c0f5b5 100644 --- a/libcxx/docs/ReleaseNotes/22.rst +++ b/libcxx/docs/ReleaseNotes/22.rst @@ -62,6 +62,7 @@ Improvements and New Features has been improved by up to 3x - The performance of ``insert(iterator, iterator)`` of ``map``, ``set``, ``multimap`` and ``multiset`` has been improved by up to 2.5x +- The performance of ``erase(iterator, iterator)`` in the unordered containers has been improved by up to 1.9x - The performance of ``map::insert_or_assign`` has been improved by up to 2x - ``ofstream::write`` has been optimized to pass through large strings to system calls directly instead of copying them in chunks into a buffer. diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table index 6b65e738fef3b..5432abb4ab39d 100644 --- a/libcxx/include/__hash_table +++ b/libcxx/include/__hash_table @@ -1037,7 +1037,21 @@ private: } _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__hash_table&, false_type) _NOEXCEPT {} - _LIBCPP_HIDE_FROM_ABI void __deallocate_node(__next_pointer __np) _NOEXCEPT; + _LIBCPP_HIDE_FROM_ABI void __deallocate_node(__node_pointer __nd) _NOEXCEPT { + auto& __alloc = __node_alloc(); + __node_traits::destroy(__alloc, std::addressof(__nd->__get_value())); + std::__destroy_at(std::__to_address(__nd)); + __node_traits::deallocate(__alloc, __nd, 1); + } + + _LIBCPP_HIDE_FROM_ABI void __deallocate_node_list(__next_pointer __np) _NOEXCEPT { + while (__np != nullptr) { + __next_pointer __next = __np->__next_; + __deallocate_node(__np->__upcast()); + __np = __next; + } + } + _LIBCPP_HIDE_FROM_ABI __next_pointer __detach() _NOEXCEPT; template ::value, int> = 0> @@ -1175,7 +1189,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::~__hash_table() { static_assert(is_copy_constructible::value, "Hasher must be copy-constructible."); #endif - __deallocate_node(__first_node_.__next_); + __deallocate_node_list(__first_node_.__next_); } template @@ -1251,7 +1265,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::operator=(const __hash_table& __other) // At this point we either have consumed the whole incoming hash table, or we don't have any more nodes to reuse in // the destination. Either continue with constructing new nodes, or deallocate the left over nodes. if (__own_iter->__next_) { - __deallocate_node(__own_iter->__next_); + __deallocate_node_list(__own_iter->__next_); __own_iter->__next_ = nullptr; } else { __copy_construct(__other_iter, __own_iter, __current_chash); @@ -1262,19 +1276,6 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::operator=(const __hash_table& __other) return *this; } -template -void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__deallocate_node(__next_pointer __np) _NOEXCEPT { - __node_allocator& __na = __node_alloc(); - while (__np != nullptr) { - __next_pointer __next = __np->__next_; - __node_pointer __real_np = __np->__upcast(); - __node_traits::destroy(__na, std::addressof(__real_np->__get_value())); - std::__destroy_at(std::addressof(*__real_np)); - __node_traits::deallocate(__na, __real_np, 1); - __np = __next; - } -} - template typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__next_pointer __hash_table<_Tp, _Hash, _Equal, _Alloc>::__detach() _NOEXCEPT { @@ -1318,7 +1319,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign(__hash_table& __u, max_load_factor() = __u.max_load_factor(); if (bucket_count() != 0) { __next_pointer __cache = __detach(); - auto __guard = std::__make_scope_guard([&] { __deallocate_node(__cache); }); + auto __guard = std::__make_scope_guard([&] { __deallocate_node_list(__cache); }); const_iterator __i = __u.begin(); while (__cache != nullptr && __u.size() != 0) { __assign_value(__cache->__upcast()->__get_value(), std::move(__u.remove(__i++)->__get_value())); @@ -1353,7 +1354,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_unique(_InputIterator __ if (bucket_count() != 0) { __next_pointer __cache = __detach(); - auto __guard = std::__make_scope_guard([&] { __deallocate_node(__cache); }); + auto __guard = std::__make_scope_guard([&] { __deallocate_node_list(__cache); }); for (; __cache != nullptr && __first != __last; ++__first) { __assign_value(__cache->__upcast()->__get_value(), *__first); __next_pointer __next = __cache->__next_; @@ -1374,7 +1375,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_multi(_InputIterator __f "__assign_multi may only be called with the containers value type or the nodes value type"); if (bucket_count() != 0) { __next_pointer __cache = __detach(); - auto __guard = std::__make_scope_guard([&] { __deallocate_node(__cache); }); + auto __guard = std::__make_scope_guard([&] { __deallocate_node_list(__cache); }); for (; __cache != nullptr && __first != __last; ++__first) { __assign_value(__cache->__upcast()->__get_value(), *__first); __next_pointer __next = __cache->__next_; @@ -1413,7 +1414,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::end() const _NOEXCEPT { template void __hash_table<_Tp, _Hash, _Equal, _Alloc>::clear() _NOEXCEPT { if (size() > 0) { - __deallocate_node(__first_node_.__next_); + __deallocate_node_list(__first_node_.__next_); __first_node_.__next_ = nullptr; size_type __bc = bucket_count(); for (size_type __i = 0; __i < __bc; ++__i) @@ -1873,12 +1874,61 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __p) { template typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first, const_iterator __last) { - for (const_iterator __p = __first; __first != __last; __p = __first) { - ++__first; - erase(__p); + if (__first == __last) + return iterator(__last.__node_); + + // current node + __next_pointer __current = __first.__node_; + size_type __bucket_count = bucket_count(); + size_t __chash = std::__constrain_hash(__current->__hash(), __bucket_count); + // find previous node + __next_pointer __before_first = __bucket_list_[__chash]; + for (; __before_first->__next_ != __current; __before_first = __before_first->__next_) + ; + + __next_pointer __last_node = __last.__node_; + + // If __before_first is in the same bucket (i.e. the first element we erase is not the first in the bucket), clear + // this bucket first without re-linking it + if (__before_first != __first_node_.__ptr() && + std::__constrain_hash(__before_first->__hash(), __bucket_count) == __chash) { + while (__current != __last_node) { + auto __next = __current->__next_; + __deallocate_node(__current->__upcast()); + __current = __next; + --__size_; + + if (__next) { + if (auto __next_chash = std::__constrain_hash(__next->__hash(), __bucket_count); __next_chash != __chash) { + __bucket_list_[__next_chash] = __before_first; + __chash = __next_chash; + break; + } + } + } + } + + while (__current != __last_node) { + auto __next = __current->__next_; + __deallocate_node(__current->__upcast()); + __current = __next; + --__size_; + + // When switching buckets, set the old bucket to be empty and update the next bucket to have __before_first as its + // before-first element + if (__next) { + if (auto __next_chash = std::__constrain_hash(__next->__hash(), __bucket_count); __next_chash != __chash) { + __bucket_list_[__chash] = nullptr; + __bucket_list_[__next_chash] = __before_first; + __chash = __next_chash; + } + } } - __next_pointer __np = __last.__node_; - return iterator(__np); + + // re-link __before_first with __last + __before_first->__next_ = __current; + + return iterator(__last.__node_); } template diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp index f8a2bdd3fee73..38b75c0c1986b 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp @@ -91,6 +91,37 @@ int main(int, char**) { assert(c.size() == 0); assert(k == c.end()); } + { // Make sure we're properly destroying the elements when erasing + { // When erasing part of a bucket + std::unordered_multimap map; + map.insert(std::make_pair(1, "This is a long string to make sure ASan can detect a memory leak")); + map.insert(std::make_pair(1, "This is another long string to make sure ASan can detect a memory leak")); + map.erase(++map.begin(), map.end()); + } + { // When erasing the whole bucket + std::unordered_multimap map; + map.insert(std::make_pair(1, "This is a long string to make sure ASan can detect a memory leak")); + map.insert(std::make_pair(1, "This is another long string to make sure ASan can detect a memory leak")); + map.erase(map.begin(), map.end()); + } + } + { // Make sure that we're properly updating the bucket list when starting within a bucket + struct MyHash { + size_t operator()(size_t v) const { return v; } + }; + std::unordered_multimap map; + size_t collision_val = 2 + map.bucket_count(); // try to get a bucket collision + map.rehash(3); + map.insert(std::pair(1, 1)); + map.insert(std::pair(collision_val, 1)); + map.insert(std::pair(2, 1)); + LIBCPP_ASSERT(map.bucket(2) == map.bucket(collision_val)); + + auto erase = map.equal_range(2); + map.erase(erase.first, erase.second); + for (const auto& v : map) + assert(v.first == 1 || v.first == collision_val); + } #if TEST_STD_VER >= 11 { typedef std::unordered_multimap map; + size_t collision_val = 2 + map.bucket_count(); // try to get a bucket collision + map.rehash(3); + map.insert(1); + map.insert(collision_val); + map.insert(2); + LIBCPP_ASSERT(map.bucket(2) == map.bucket(collision_val)); + + auto erase = map.equal_range(2); + map.erase(erase.first, erase.second); + for (const auto& v : map) + assert(v == 1 || v == collision_val); + } #if TEST_STD_VER >= 11 { typedef std::unordered_multiset, std::equal_to, min_allocator> C; From 8da0df49568206544f64eea890e51354d04187cd Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Tue, 21 Oct 2025 09:29:18 +0100 Subject: [PATCH 09/99] [TableGen] List the indices of sub-operands (#163723) Some instances of the `Operand` class used in Tablegen instruction definitions expand to a cluster of multiple operands at the MC layer, such as complex addressing modes involving base + offset + shift, or clusters of operands describing conditional Arm instructions or predicated MVE instructions. There's currently no convenient way for C++ code to know the offset of one of those sub-operands from the start of the cluster: instead it just hard-codes magic numbers like `index+2`, which is hard to read and fragile. This patch adds an extra piece of output to `InstrInfoEmitter` to define those instruction offsets, based on the name of the `Operand` class instance in Tablegen, and the names assigned to the sub-operands in the `MIOperandInfo` field. For example, if target Foo were to define def Bar : Operand { let MIOperandInfo = (ops GPR:$first, i32imm:$second); // ... } then the new constants would be `Foo::SUBOP_Bar_first` and `Foo::SUBOP_Bar_second`, defined as 0 and 1 respectively. As an example, I've converted some magic numbers related to the MVE predication operand types (`vpred_n` and its superset `vpred_r`) to use the new named constants in place of the integer literals they previously used. This is more verbose, but also clearer, because it explains why the integer is chosen instead of what its value is. --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 2 +- .../Target/ARM/MVETPAndVPTOptimisationsPass.cpp | 2 +- llvm/utils/TableGen/InstrInfoEmitter.cpp | 17 +++++++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 96ee69cf3f4ce..406f4c1f21983 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -882,7 +882,7 @@ static bool producesFalseLanesZero(MachineInstr &MI, continue; // Skip the lr predicate reg int PIdx = llvm::findFirstVPTPredOperandIdx(MI); - if (PIdx != -1 && (int)MO.getOperandNo() == PIdx + 2) + if (PIdx != -1 && MO.getOperandNo() == PIdx + ARM::SUBOP_vpred_n_tp_reg) continue; // Check that this instruction will produce zeros in its false lanes: diff --git a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp index 5eeb4fe995485..413e8442419fd 100644 --- a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp +++ b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp @@ -534,7 +534,7 @@ bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML, Register LR = LoopPhi->getOperand(0).getReg(); for (MachineInstr *MI : MVEInstrs) { int Idx = findFirstVPTPredOperandIdx(*MI); - MI->getOperand(Idx + 2).setReg(LR); + MI->getOperand(Idx + ARM::SUBOP_vpred_n_tp_reg).setReg(LR); } } diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index 176e4b250b82a..d1b14fbbdcd3e 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/CodeGenHelpers.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TGTimer.h" @@ -1135,6 +1136,22 @@ void InstrInfoEmitter::run(raw_ostream &OS) { OS << "\n};\n} // end namespace llvm\n"; + { + NamespaceEmitter LlvmNS(OS, "llvm"); + NamespaceEmitter TargetNS(OS, Target.getInstNamespace()); + for (const Record *R : Records.getAllDerivedDefinitions("Operand")) { + if (R->isAnonymous()) + continue; + if (const DagInit *D = R->getValueAsDag("MIOperandInfo")) { + for (unsigned i = 0, e = D->getNumArgs(); i < e; ++i) { + if (const StringInit *Name = D->getArgName(i)) + OS << "constexpr unsigned SUBOP_" << R->getName() << "_" + << Name->getValue() << " = " << i << ";\n"; + } + } + } + } + OS << "#endif // GET_INSTRINFO_HEADER\n\n"; OS << "#ifdef GET_INSTRINFO_HELPER_DECLS\n"; From f188c97cc193773071b0b61ebf214705fb84189e Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 21 Oct 2025 01:38:24 -0700 Subject: [PATCH 10/99] [lldb] Add bidirectional packetLog to gdbclientutils.py (#162176) While debugging the tests for #155000 I found it helpful to have both sides of the simulated gdb-rsp traffic rather than just the responses so I've extended the packetLog in MockGDBServerResponder to record traffic in both directions. Tests have been updated accordingly --- .../Python/lldbsuite/test/gdbclientutils.py | 66 ++++++++++++++++--- .../Python/lldbsuite/test/lldbgdbclient.py | 18 ++--- .../gdb_remote_client/TestContinue.py | 6 +- .../gdb_remote_client/TestGDBRemoteClient.py | 52 ++++++--------- .../gdb_remote_client/TestGDBRemoteLoad.py | 4 +- .../TestGDBRemotePlatformFile.py | 22 +++---- 6 files changed, 102 insertions(+), 66 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py index 1a2860a327439..bd2fdc0a60cb4 100644 --- a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py +++ b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py @@ -5,8 +5,9 @@ import threading import socket import traceback +from enum import Enum from lldbsuite.support import seven -from typing import Optional, List, Tuple +from typing import Optional, List, Tuple, Union, Sequence def checksum(message): @@ -76,6 +77,35 @@ def hex_decode_bytes(hex_bytes): return out +class PacketDirection(Enum): + RECV = "recv" + SEND = "send" + + +class PacketLog: + def __init__(self): + self._packets: list[tuple[PacketDirection, str]] = [] + + def add_sent(self, packet: str): + self._packets.append((PacketDirection.SEND, packet)) + + def add_received(self, packet: str): + self._packets.append((PacketDirection.RECV, packet)) + + def get_sent(self): + return [ + pkt for direction, pkt in self._packets if direction == PacketDirection.SEND + ] + + def get_received(self): + return [ + pkt for direction, pkt in self._packets if direction == PacketDirection.RECV + ] + + def __iter__(self): + return iter(self._packets) + + class MockGDBServerResponder: """ A base class for handling client packets and issuing server responses for @@ -90,21 +120,33 @@ class MockGDBServerResponder: registerCount: int = 40 - class RESPONSE_DISCONNECT: - pass + class SpecialResponse(Enum): + RESPONSE_DISCONNECT = 0 + RESPONSE_NONE = 1 - class RESPONSE_NONE: - pass + RESPONSE_DISCONNECT = SpecialResponse.RESPONSE_DISCONNECT + RESPONSE_NONE = SpecialResponse.RESPONSE_NONE + Response = Union[str, SpecialResponse] def __init__(self): - self.packetLog: List[str] = [] + self.packetLog = PacketLog() - def respond(self, packet): + def respond(self, packet: str) -> Sequence[Response]: """ Return the unframed packet data that the server should issue in response to the given packet received from the client. """ - self.packetLog.append(packet) + self.packetLog.add_received(packet) + response = self._respond_impl(packet) + if not isinstance(response, list): + response = [response] + for part in response: + if isinstance(part, self.SpecialResponse): + continue + self.packetLog.add_sent(part) + return response + + def _respond_impl(self, packet) -> Union[Response, List[Response]]: if packet is MockGDBServer.PACKET_INTERRUPT: return self.interrupt() if packet == "c": @@ -664,17 +706,19 @@ def _handlePacket(self, packet): # adding validation code to make sure the client only sends ACKs # when it's supposed to. return - response = "" + response = [""] # We'll handle the ack stuff here since it's not something any of the # tests will be concerned about, and it'll get turned off quickly anyway. if self._shouldSendAck: self._socket.sendall(seven.bitcast_to_bytes("+")) if packet == "QStartNoAckMode": self._shouldSendAck = False - response = "OK" + response = ["OK"] elif self.responder is not None: # Delegate everything else to our responder response = self.responder.respond(packet) + # MockGDBServerResponder no longer returns non-lists but others like + # ReverseTestBase still do if not isinstance(response, list): response = [response] for part in response: @@ -682,6 +726,8 @@ def _handlePacket(self, packet): continue if part is MockGDBServerResponder.RESPONSE_DISCONNECT: raise self.TerminateConnectionException() + # Should have handled the non-str's above + assert isinstance(part, str) self._sendPacket(part) PACKET_ACK = object() diff --git a/lldb/packages/Python/lldbsuite/test/lldbgdbclient.py b/lldb/packages/Python/lldbsuite/test/lldbgdbclient.py index 599f7878e6edb..9b2a89e934132 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbgdbclient.py +++ b/lldb/packages/Python/lldbsuite/test/lldbgdbclient.py @@ -10,7 +10,7 @@ class GDBRemoteTestBase(TestBase): Base class for GDB client tests. This class will setup and start a mock GDB server for the test to use. - It also provides assertPacketLogContains, which simplifies the checking + It also provides assertPacketLogReceived, which simplifies the checking of packets sent by the client. """ @@ -60,30 +60,32 @@ def connect(self, target, plugin="gdb-remote"): self.assertTrue(process, PROCESS_IS_VALID) return process - def assertPacketLogContains(self, packets, log=None): + def assertPacketLogReceived(self, packets, log: PacketLog = None): """ - Assert that the mock server's packet log contains the given packets. + Assert that the mock server's packet log received the given packets. The packet log includes all packets sent by the client and received - by the server. This fuction makes it easy to verify that the client + by the server. This function makes it easy to verify that the client sent the expected packets to the server. The check does not require that the packets be consecutive, but does require that they are ordered in the log as they ordered in the arg. """ if log is None: - log = self.server.responder.packetLog + received = self.server.responder.packetLog.get_received() + else: + received = log.get_received() i = 0 j = 0 - while i < len(packets) and j < len(log): - if log[j] == packets[i]: + while i < len(packets) and j < len(received): + if received[j] == packets[i]: i += 1 j += 1 if i < len(packets): self.fail( "Did not receive: %s\nLast 10 packets:\n\t%s" - % (packets[i], "\n\t".join(log)) + % (packets[i], "\n\t".join(received)) ) diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestContinue.py b/lldb/test/API/functionalities/gdb_remote_client/TestContinue.py index 3af4ca859f86e..67f0783167a32 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestContinue.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestContinue.py @@ -41,7 +41,7 @@ def qfThreadInfo(self): lldbutil.expect_state_changes( self, self.dbg.GetListener(), process, [lldb.eStateExited] ) - self.assertPacketLogContains(["vCont;C13:401"]) + self.assertPacketLogReceived(["vCont;C13:401"]) def test_continue_no_vCont(self): class MyResponder(self.BaseResponder): @@ -61,7 +61,7 @@ def other(self, packet): lldbutil.expect_state_changes( self, self.dbg.GetListener(), process, [lldb.eStateExited] ) - self.assertPacketLogContains(["Hc401", "C13"]) + self.assertPacketLogReceived(["Hc401", "C13"]) def test_continue_multiprocess(self): class MyResponder(self.BaseResponder): @@ -74,4 +74,4 @@ class MyResponder(self.BaseResponder): lldbutil.expect_state_changes( self, self.dbg.GetListener(), process, [lldb.eStateExited] ) - self.assertPacketLogContains(["vCont;C13:p400.401"]) + self.assertPacketLogReceived(["vCont;C13:p400.401"]) diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py b/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py index 67c5d7d55846d..8cf04522153d9 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py @@ -36,7 +36,7 @@ def test_connect(self): """Test connecting to a remote gdb server""" target = self.createTarget("a.yaml") process = self.connect(target) - self.assertPacketLogContains(["qProcessInfo", "qfThreadInfo"]) + self.assertPacketLogReceived(["qProcessInfo", "qfThreadInfo"]) def test_attach_fail(self): error_msg = "mock-error-msg" @@ -142,15 +142,16 @@ def test_read_registers_using_g_packets(self): # But there certainly should be no p packets after the g packet. self.read_registers(process) - print(f"\nPACKET LOG:\n{self.server.responder.packetLog}\n") + received = self.server.responder.packetLog.get_received() + print(f"\nPACKET LOG:\n{received}\n") g_pos = 0 try: - g_pos = self.server.responder.packetLog.index("g") + g_pos = received.index("g") except err: self.fail("'g' packet not found after fetching registers") try: - second_g = self.server.responder.packetLog.index("g", g_pos) + second_g = received.index("g", g_pos + 1) self.fail("Found more than one 'g' packet") except: pass @@ -158,13 +159,7 @@ def test_read_registers_using_g_packets(self): # Make sure there aren't any `p` packets after the `g` packet: self.assertEqual( 0, - len( - [ - p - for p in self.server.responder.packetLog[g_pos:] - if p.startswith("p") - ] - ), + len([p for p in received[g_pos:] if p.startswith("p")]), ) def test_read_registers_using_p_packets(self): @@ -177,10 +172,9 @@ def test_read_registers_using_p_packets(self): process = self.connect(target) self.read_registers(process) - self.assertNotIn("g", self.server.responder.packetLog) - self.assertGreater( - len([p for p in self.server.responder.packetLog if p.startswith("p")]), 0 - ) + received = self.server.responder.packetLog.get_received() + self.assertNotIn("g", received) + self.assertGreater(len([p for p in received if p.startswith("p")]), 0) def test_write_registers_using_P_packets(self): """Test writing registers using 'P' packets (default behavior)""" @@ -189,12 +183,9 @@ def test_write_registers_using_P_packets(self): process = self.connect(target) self.write_registers(process) - self.assertEqual( - 0, len([p for p in self.server.responder.packetLog if p.startswith("G")]) - ) - self.assertGreater( - len([p for p in self.server.responder.packetLog if p.startswith("P")]), 0 - ) + received = self.server.responder.packetLog.get_received() + self.assertEqual(0, len([p for p in received if p.startswith("G")])) + self.assertGreater(len([p for p in received if p.startswith("P")]), 0) def test_write_registers_using_G_packets(self): """Test writing registers using 'G' packets""" @@ -209,12 +200,9 @@ def readRegister(self, register): process = self.connect(target) self.write_registers(process) - self.assertEqual( - 0, len([p for p in self.server.responder.packetLog if p.startswith("P")]) - ) - self.assertGreater( - len([p for p in self.server.responder.packetLog if p.startswith("G")]), 0 - ) + received = self.server.responder.packetLog.get_received() + self.assertEqual(0, len([p for p in received if p.startswith("P")])) + self.assertGreater(len([p for p in received if p.startswith("G")]), 0) def read_registers(self, process): self.for_each_gpr( @@ -291,7 +279,7 @@ def qLaunchSuccess(self): self.assertTrue(process, PROCESS_IS_VALID) self.assertEqual(process.GetProcessID(), 16) - self.assertPacketLogContains( + self.assertPacketLogReceived( [ "A%d,0,%s,8,1,61726731,8,2,61726732,8,3,61726733" % (len(exe_hex), exe_hex), @@ -352,7 +340,7 @@ def A(self, packet): self.assertTrue(process, PROCESS_IS_VALID) self.assertEqual(process.GetProcessID(), 16) - self.assertPacketLogContains( + self.assertPacketLogReceived( ["vRun;%s;61726731;61726732;61726733" % (exe_hex,)] ) @@ -424,7 +412,7 @@ def A(self, packet): self.assertTrue(process, PROCESS_IS_VALID) self.assertEqual(process.GetProcessID(), 16) - self.assertPacketLogContains( + self.assertPacketLogReceived( ["vRun;%s;61726731;61726732;61726733" % (exe_hex,)] ) @@ -468,7 +456,7 @@ def vRun(self, packet): lldb.SBError(), ) # error - self.assertPacketLogContains( + self.assertPacketLogReceived( [ "QEnvironment:EQUALS=foo=bar", "QEnvironmentHexEncoded:4e45454453454e433d66726f6224", @@ -522,7 +510,7 @@ def QEnvironment(self, packet): lldb.SBError(), ) # error - self.assertPacketLogContains( + self.assertPacketLogReceived( [ "QEnvironmentHexEncoded:455155414c533d666f6f3d626172", "QEnvironmentHexEncoded:4e45454453454e433d66726f6224", diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteLoad.py b/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteLoad.py index f0a5429e6c1ce..d8214ae6b9a2d 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteLoad.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteLoad.py @@ -22,7 +22,7 @@ def test_ram_load(self): target = self.createTarget("a.yaml") process = self.connect(target) self.dbg.HandleCommand("target modules load -l -s0") - self.assertPacketLogContains(["M1000,4:c3c3c3c3", "M1004,2:3232"]) + self.assertPacketLogReceived(["M1000,4:c3c3c3c3", "M1004,2:3232"]) @skipIfXmlSupportMissing def test_flash_load(self): @@ -63,7 +63,7 @@ def other(self, packet): target = self.createTarget("a.yaml") process = self.connect(target) self.dbg.HandleCommand("target modules load -l -s0") - self.assertPacketLogContains( + self.assertPacketLogReceived( [ "vFlashErase:1000,100", "vFlashWrite:1000:\xc3\xc3\xc3\xc3", diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemotePlatformFile.py b/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemotePlatformFile.py index 69e04df81bc6e..a3def8165586a 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemotePlatformFile.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemotePlatformFile.py @@ -30,7 +30,7 @@ def vFile(self, packet): ) self.match("platform file write 16 -o 11 -d teststring", [r"Return = 10"]) self.match("platform file close 16", [r"file 16 closed."]) - self.assertPacketLogContains( + self.assertPacketLogReceived( [ "vFile:open:2f736f6d652f66696c652e747874,00000202,000001ed", "vFile:pread:10,d,b", @@ -66,7 +66,7 @@ def vFile(self, packet): error=True, ) self.match("platform file close 16", [enosys_regex], error=True) - self.assertPacketLogContains( + self.assertPacketLogReceived( [ "vFile:open:2f736f6d652f66696c652e747874,00000202,000001ed", "vFile:pread:10,d,b", @@ -88,7 +88,7 @@ def vFile(self, packet): "platform get-size /some/file.txt", [r"File size of /some/file\.txt \(remote\): 4096"], ) - self.assertPacketLogContains( + self.assertPacketLogReceived( [ "vFile:size:2f736f6d652f66696c652e747874", ] @@ -113,7 +113,7 @@ def vFile(self, packet): "platform get-size /some/file.txt", [r"File size of /some/file\.txt \(remote\): 66051"], ) - self.assertPacketLogContains( + self.assertPacketLogReceived( [ "vFile:size:2f736f6d652f66696c652e747874", "vFile:open:2f736f6d652f66696c652e747874,00000000,00000000", @@ -135,7 +135,7 @@ def vFile(self, packet): [r"File size of /other/file\.txt \(remote\): 66051"], ) - self.assertPacketLogContains( + self.assertPacketLogReceived( [ "vFile:size:2f6f746865722f66696c652e747874", "vFile:open:2f6f746865722f66696c652e747874,00000000,00000000", @@ -161,7 +161,7 @@ def vFile(self, packet): "platform get-permissions /some/file.txt", [r"File permissions of /some/file\.txt \(remote\): 0o0644"], ) - self.assertPacketLogContains( + self.assertPacketLogReceived( [ "vFile:mode:2f736f6d652f66696c652e747874", ] @@ -190,7 +190,7 @@ def vFile(self, packet): "platform get-permissions /some/file.txt", [r"File permissions of /some/file\.txt \(remote\): 0o0644"], ) - self.assertPacketLogContains( + self.assertPacketLogReceived( [ "vFile:mode:2f736f6d652f66696c652e747874", "vFile:open:2f736f6d652f66696c652e747874,00000000,00000000", @@ -214,7 +214,7 @@ def vFile(self, packet): "platform file-exists /some/file.txt", [r"File /some/file\.txt \(remote\) exists"], ) - self.assertPacketLogContains( + self.assertPacketLogReceived( [ "vFile:exists:2f736f6d652f66696c652e747874", ] @@ -233,7 +233,7 @@ def vFile(self, packet): "platform file-exists /some/file.txt", [r"File /some/file\.txt \(remote\) does not exist"], ) - self.assertPacketLogContains( + self.assertPacketLogReceived( [ "vFile:exists:2f736f6d652f66696c652e747874", ] @@ -256,7 +256,7 @@ def vFile(self, packet): "platform file-exists /some/file.txt", [r"File /some/file\.txt \(remote\) exists"], ) - self.assertPacketLogContains( + self.assertPacketLogReceived( [ "vFile:exists:2f736f6d652f66696c652e747874", "vFile:open:2f736f6d652f66696c652e747874,00000000,00000000", @@ -279,7 +279,7 @@ def vFile(self, packet): "platform file-exists /some/file.txt", [r"File /some/file\.txt \(remote\) does not exist"], ) - self.assertPacketLogContains( + self.assertPacketLogReceived( [ "vFile:exists:2f736f6d652f66696c652e747874", "vFile:open:2f736f6d652f66696c652e747874,00000000,00000000", From 00092f9bdd1d5037a5f4c8f3059e31e32aee6e8d Mon Sep 17 00:00:00 2001 From: Keshav Vinayak Jha <31160700+keshavvinayak01@users.noreply.github.com> Date: Tue, 21 Oct 2025 14:09:57 +0530 Subject: [PATCH 11/99] [MLIR] [Vector] Added canonicalizer for folding from_elements + transpose (#161841) ## Description Adds a new canonicalizer that folds `vector.from_elements(vector.transpose))` => `vector.from_elements`. This canonicalization reorders the input elements for `vector.from_elements`, adjusts the output shape to match the effect of the transpose op and eliminating its need. ## Testing Added a 2D vector lit test that verifies the working of the rewrite. --------- Signed-off-by: Keshav Vinayak Jha --- mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 70 +++++++++++++++++++++- mlir/test/Dialect/Vector/canonicalize.mlir | 56 +++++++++++++++++ 2 files changed, 125 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index 45c54c7587c69..ad8255a95cb4e 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -6835,6 +6835,73 @@ class FoldTransposeShapeCast final : public OpRewritePattern { } }; +/// Folds transpose(from_elements(...)) into a new from_elements with permuted +/// operands matching the transposed shape. +/// +/// Example: +/// +/// %v = vector.from_elements %a00, %a01, %a02, %a10, %a11, %a12 : +/// vector<2x3xi32> %t = vector.transpose %v, [1, 0] : vector<2x3xi32> to +/// vector<3x2xi32> +/// +/// becomes -> +/// +/// %r = vector.from_elements %a00, %a10, %a01, %a11, %a02, %a12 : +/// vector<3x2xi32> +/// +class FoldTransposeFromElements final : public OpRewritePattern { +public: + using Base::Base; + LogicalResult matchAndRewrite(vector::TransposeOp transposeOp, + PatternRewriter &rewriter) const override { + auto fromElementsOp = + transposeOp.getVector().getDefiningOp(); + if (!fromElementsOp) + return failure(); + + VectorType srcTy = fromElementsOp.getDest().getType(); + VectorType dstTy = transposeOp.getType(); + + ArrayRef permutation = transposeOp.getPermutation(); + int64_t rank = srcTy.getRank(); + + // Build inverse permutation to map destination indices back to source. + SmallVector inversePerm(rank, 0); + for (int64_t i = 0; i < rank; ++i) + inversePerm[permutation[i]] = i; + + ArrayRef srcShape = srcTy.getShape(); + ArrayRef dstShape = dstTy.getShape(); + SmallVector srcIdx(rank, 0); + SmallVector dstIdx(rank, 0); + SmallVector srcStrides = computeStrides(srcShape); + SmallVector dstStrides = computeStrides(dstShape); + + auto elementsOld = fromElementsOp.getElements(); + SmallVector elementsNew; + int64_t dstNumElements = dstTy.getNumElements(); + elementsNew.reserve(dstNumElements); + + // For each element in destination row-major order, pick the corresponding + // source element. + for (int64_t linearIdx = 0; linearIdx < dstNumElements; ++linearIdx) { + // Pick the destination element index. + dstIdx = delinearize(linearIdx, dstStrides); + // Map the destination element index to the source element index. + for (int64_t j = 0; j < rank; ++j) + srcIdx[j] = dstIdx[inversePerm[j]]; + // Linearize the source element index. + int64_t srcLin = linearize(srcIdx, srcStrides); + // Add the source element to the new elements. + elementsNew.push_back(elementsOld[srcLin]); + } + + rewriter.replaceOpWithNewOp(transposeOp, dstTy, + elementsNew); + return success(); + } +}; + /// Folds transpose(broadcast(x)) to broadcast(x) if the transpose is /// 'order preserving', where 'order preserving' means the flattened /// inputs and outputs of the transpose have identical (numerical) values. @@ -6935,7 +7002,8 @@ class FoldTransposeBroadcast : public OpRewritePattern { void vector::TransposeOp::getCanonicalizationPatterns( RewritePatternSet &results, MLIRContext *context) { results.add(context); + FoldTransposeSplat, FoldTransposeFromElements, + FoldTransposeBroadcast>(context); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index 59774f92cac36..084f49fca212f 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -3530,6 +3530,62 @@ func.func @from_elements_index_to_i64_conversion() -> vector<3xi64> { // ----- +// +--------------------------------------------------------------------------- +// Tests for FoldTransposeFromElements +// +--------------------------------------------------------------------------- + +// CHECK-LABEL: transpose_from_elements_1d +// CHECK-SAME: %[[EL_0:.*]]: i32, %[[EL_1:.*]]: i32 +func.func @transpose_from_elements_1d(%el_0: i32, %el_1: i32) -> vector<2xi32> { + %v = vector.from_elements %el_0, %el_1 : vector<2xi32> + %t = vector.transpose %v, [0] : vector<2xi32> to vector<2xi32> + return %t : vector<2xi32> + // CHECK: %[[R:.*]] = vector.from_elements %[[EL_0]], %[[EL_1]] : vector<2xi32> + // CHECK-NOT: vector.transpose + // CHECK: return %[[R]] +} + +// CHECK-LABEL: transpose_from_elements_2d +// CHECK-SAME: %[[EL_0_0:.*]]: i32, %[[EL_0_1:.*]]: i32, %[[EL_0_2:.*]]: i32, %[[EL_1_0:.*]]: i32, %[[EL_1_1:.*]]: i32, %[[EL_1_2:.*]]: i32 +func.func @transpose_from_elements_2d( + %el_0_0: i32, %el_0_1: i32, %el_0_2: i32, + %el_1_0: i32, %el_1_1: i32, %el_1_2: i32 +) -> vector<3x2xi32> { + %v = vector.from_elements %el_0_0, %el_0_1, %el_0_2, %el_1_0, %el_1_1, %el_1_2 : vector<2x3xi32> + %t = vector.transpose %v, [1, 0] : vector<2x3xi32> to vector<3x2xi32> + return %t : vector<3x2xi32> + // CHECK: %[[R:.*]] = vector.from_elements %[[EL_0_0:.*]], %[[EL_1_0:.*]], %[[EL_0_1:.*]], %[[EL_1_1:.*]], %[[EL_0_2:.*]], %[[EL_1_2:.*]] : vector<3x2xi32> + // CHECK-NOT: vector.transpose + // CHECK: return %[[R]] +} + +// CHECK-LABEL: transpose_from_elements_3d +// CHECK-SAME: %[[EL_0_0_0:.*]]: i32, %[[EL_0_0_1:.*]]: i32, %[[EL_0_1_0:.*]]: i32, %[[EL_0_1_1:.*]]: i32, %[[EL_0_2_0:.*]]: i32, %[[EL_0_2_1:.*]]: i32, %[[EL_1_0_0:.*]]: i32, %[[EL_1_0_1:.*]]: i32, %[[EL_1_1_0:.*]]: i32, %[[EL_1_1_1:.*]]: i32, %[[EL_1_2_0:.*]]: i32, %[[EL_1_2_1:.*]]: i32 +func.func @transpose_from_elements_3d( + %el_0_0_0: i32, %el_0_0_1: i32, %el_0_1_0: i32, %el_0_1_1: i32, %el_0_2_0: i32, %el_0_2_1: i32, + %el_1_0_0: i32, %el_1_0_1: i32, %el_1_1_0: i32, %el_1_1_1: i32, %el_1_2_0: i32, %el_1_2_1: i32 +) -> vector<2x2x3xi32> { + %v = vector.from_elements + %el_0_0_0, %el_0_0_1, + %el_0_1_0, %el_0_1_1, + %el_0_2_0, %el_0_2_1, + %el_1_0_0, %el_1_0_1, + %el_1_1_0, %el_1_1_1, + %el_1_2_0, %el_1_2_1 + : vector<2x3x2xi32> + %t = vector.transpose %v, [0, 2, 1] : vector<2x3x2xi32> to vector<2x2x3xi32> + return %t : vector<2x2x3xi32> + // CHECK: %[[R:.*]] = vector.from_elements %[[EL_0_0_0:.*]], %[[EL_0_1_0:.*]], %[[EL_0_2_0:.*]], %[[EL_0_0_1:.*]], %[[EL_0_1_1:.*]], %[[EL_0_2_1:.*]], %[[EL_1_0_0:.*]], %[[EL_1_1_0:.*]], %[[EL_1_2_0:.*]], %[[EL_1_0_1:.*]], %[[EL_1_1_1:.*]], %[[EL_1_2_1:.*]] : vector<2x2x3xi32> + // CHECK-NOT: vector.transpose + // CHECK: return %[[R]] +} + +// +--------------------------------------------------------------------------- +// End of Tests for FoldTransposeFromElements +// +--------------------------------------------------------------------------- + +// ----- + // Not a DenseElementsAttr, don't fold. // CHECK-LABEL: func @negative_insert_llvm_undef( From ab789beffdf68c65722203250d3094c15466459f Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga Date: Tue, 21 Oct 2025 18:11:09 +0900 Subject: [PATCH 12/99] [DA] Add initial support for monotonicity check (#162280) The dependence testing functions in DA assume that the analyzed AddRec does not wrap over the entire iteration space. For AddRecs that may wrap, DA should conservatively return unknown dependence. However, no validation is currently performed to ensure that this condition holds, which can lead to incorrect results in some cases. This patch introduces the notion of *monotonicity* and a validation logic to check whether a SCEV is monotonic. The monotonicity check classifies the SCEV into one of the following categories: - Unknown: Nothing is known about the monotonicity of the SCEV. - Invariant: The SCEV is loop-invariant. - MultivariateSignedMonotonic: The SCEV doesn't wrap in a signed sense for any iteration of the loops in the loop nest. The current validation logic basically searches an affine AddRec recursively and checks whether the `nsw` flag is present. Notably, it is still unclear whether we should also have a category for unsigned monotonicity. The monotonicity check is still under development and disabled by default for now. Since such a check is necessary to make DA sound, it should be enabled by default once the functionality is sufficient. Split off from #154527. --- llvm/lib/Analysis/DependenceAnalysis.cpp | 294 +++++++++- .../DependenceAnalysis/monotonicity-cast.ll | 207 +++++++ .../monotonicity-delinearize.ll | 59 ++ .../monotonicity-invariant.ll | 150 +++++ .../monotonicity-no-wrap-flags.ll | 519 ++++++++++++++++++ .../DependenceAnalysis/non-monotonic.ll | 77 +++ 6 files changed, 1303 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Analysis/DependenceAnalysis/monotonicity-cast.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/monotonicity-delinearize.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/monotonicity-invariant.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/monotonicity-no-wrap-flags.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/non-monotonic.ll diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 805b6820e1e1c..0a8c2f82e162f 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -128,6 +128,18 @@ static cl::opt RunSIVRoutinesOnly( "The purpose is mainly to exclude the influence of those routines " "in regression tests for SIV routines.")); +// TODO: This flag is disabled by default because it is still under development. +// Enable it or delete this flag when the feature is ready. +static cl::opt EnableMonotonicityCheck( + "da-enable-monotonicity-check", cl::init(false), cl::Hidden, + cl::desc("Check if the subscripts are monotonic. If it's not, dependence " + "is reported as unknown.")); + +static cl::opt DumpMonotonicityReport( + "da-dump-monotonicity-report", cl::init(false), cl::Hidden, + cl::desc( + "When printing analysis, dump the results of monotonicity checks.")); + //===----------------------------------------------------------------------===// // basics @@ -177,13 +189,196 @@ void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredTransitive(); } +namespace { + +/// The property of monotonicity of a SCEV. To define the monotonicity, assume +/// a SCEV defined within N-nested loops. Let i_k denote the iteration number +/// of the k-th loop. Then we can regard the SCEV as an N-ary function: +/// +/// F(i_1, i_2, ..., i_N) +/// +/// The domain of i_k is the closed range [0, BTC_k], where BTC_k is the +/// backedge-taken count of the k-th loop. +/// +/// A function F is said to be "monotonically increasing with respect to the +/// k-th loop" if x <= y implies the following condition: +/// +/// F(i_1, ..., i_{k-1}, x, i_{k+1}, ..., i_N) <= +/// F(i_1, ..., i_{k-1}, y, i_{k+1}, ..., i_N) +/// +/// where i_1, ..., i_{k-1}, i_{k+1}, ..., i_N, x, and y are elements of their +/// respective domains. +/// +/// Likewise F is "monotonically decreasing with respect to the k-th loop" +/// if x <= y implies +/// +/// F(i_1, ..., i_{k-1}, x, i_{k+1}, ..., i_N) >= +/// F(i_1, ..., i_{k-1}, y, i_{k+1}, ..., i_N) +/// +/// A function F that is monotonically increasing or decreasing with respect to +/// the k-th loop is simply called "monotonic with respect to k-th loop". +/// +/// A function F is said to be "multivariate monotonic" when it is monotonic +/// with respect to all of the N loops. +/// +/// Since integer comparison can be either signed or unsigned, we need to +/// distinguish monotonicity in the signed sense from that in the unsigned +/// sense. Note that the inequality "x <= y" merely indicates loop progression +/// and is not affected by the difference between signed and unsigned order. +/// +/// Currently we only consider monotonicity in a signed sense. +enum class SCEVMonotonicityType { + /// We don't know anything about the monotonicity of the SCEV. + Unknown, + + /// The SCEV is loop-invariant with respect to the outermost loop. In other + /// words, the function F corresponding to the SCEV is a constant function. + Invariant, + + /// The function F corresponding to the SCEV is multivariate monotonic in a + /// signed sense. Note that the multivariate monotonic function may also be a + /// constant function. The order employed in the definition of monotonicity + /// is not strict order. + MultivariateSignedMonotonic, +}; + +struct SCEVMonotonicity { + SCEVMonotonicity(SCEVMonotonicityType Type, + const SCEV *FailurePoint = nullptr); + + SCEVMonotonicityType getType() const { return Type; } + + const SCEV *getFailurePoint() const { return FailurePoint; } + + bool isUnknown() const { return Type == SCEVMonotonicityType::Unknown; } + + void print(raw_ostream &OS, unsigned Depth) const; + +private: + SCEVMonotonicityType Type; + + /// The subexpression that caused Unknown. Mainly for debugging purpose. + const SCEV *FailurePoint; +}; + +/// Check the monotonicity of a SCEV. Since dependence tests (SIV, MIV, etc.) +/// assume that subscript expressions are (multivariate) monotonic, we need to +/// verify this property before applying those tests. Violating this assumption +/// may cause them to produce incorrect results. +struct SCEVMonotonicityChecker + : public SCEVVisitor { + + SCEVMonotonicityChecker(ScalarEvolution *SE) : SE(SE) {} + + /// Check the monotonicity of \p Expr. \p Expr must be integer type. If \p + /// OutermostLoop is not null, \p Expr must be defined in \p OutermostLoop or + /// one of its nested loops. + SCEVMonotonicity checkMonotonicity(const SCEV *Expr, + const Loop *OutermostLoop); + +private: + ScalarEvolution *SE; + + /// The outermost loop that DA is analyzing. + const Loop *OutermostLoop; + + /// A helper to classify \p Expr as either Invariant or Unknown. + SCEVMonotonicity invariantOrUnknown(const SCEV *Expr); + + /// Return true if \p Expr is loop-invariant with respect to the outermost + /// loop. + bool isLoopInvariant(const SCEV *Expr) const; + + /// A helper to create an Unknown SCEVMonotonicity. + SCEVMonotonicity createUnknown(const SCEV *FailurePoint) { + return SCEVMonotonicity(SCEVMonotonicityType::Unknown, FailurePoint); + } + + SCEVMonotonicity visitAddRecExpr(const SCEVAddRecExpr *Expr); + + SCEVMonotonicity visitConstant(const SCEVConstant *) { + return SCEVMonotonicity(SCEVMonotonicityType::Invariant); + } + SCEVMonotonicity visitVScale(const SCEVVScale *) { + return SCEVMonotonicity(SCEVMonotonicityType::Invariant); + } + + // TODO: Handle more cases. + SCEVMonotonicity visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + return invariantOrUnknown(Expr); + } + SCEVMonotonicity visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { + return invariantOrUnknown(Expr); + } + SCEVMonotonicity visitAddExpr(const SCEVAddExpr *Expr) { + return invariantOrUnknown(Expr); + } + SCEVMonotonicity visitMulExpr(const SCEVMulExpr *Expr) { + return invariantOrUnknown(Expr); + } + SCEVMonotonicity visitPtrToIntExpr(const SCEVPtrToIntExpr *Expr) { + return invariantOrUnknown(Expr); + } + SCEVMonotonicity visitTruncateExpr(const SCEVTruncateExpr *Expr) { + return invariantOrUnknown(Expr); + } + SCEVMonotonicity visitUDivExpr(const SCEVUDivExpr *Expr) { + return invariantOrUnknown(Expr); + } + SCEVMonotonicity visitSMaxExpr(const SCEVSMaxExpr *Expr) { + return invariantOrUnknown(Expr); + } + SCEVMonotonicity visitUMaxExpr(const SCEVUMaxExpr *Expr) { + return invariantOrUnknown(Expr); + } + SCEVMonotonicity visitSMinExpr(const SCEVSMinExpr *Expr) { + return invariantOrUnknown(Expr); + } + SCEVMonotonicity visitUMinExpr(const SCEVUMinExpr *Expr) { + return invariantOrUnknown(Expr); + } + SCEVMonotonicity visitSequentialUMinExpr(const SCEVSequentialUMinExpr *Expr) { + return invariantOrUnknown(Expr); + } + SCEVMonotonicity visitUnknown(const SCEVUnknown *Expr) { + return invariantOrUnknown(Expr); + } + SCEVMonotonicity visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return invariantOrUnknown(Expr); + } + + friend struct SCEVVisitor; +}; + +} // anonymous namespace + // Used to test the dependence analyzer. // Looks through the function, noting instructions that may access memory. // Calls depends() on every possible pair and prints out the result. // Ignores all other instructions. static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA, - ScalarEvolution &SE, bool NormalizeResults) { + ScalarEvolution &SE, LoopInfo &LI, + bool NormalizeResults) { auto *F = DA->getFunction(); + + if (DumpMonotonicityReport) { + SCEVMonotonicityChecker Checker(&SE); + OS << "Monotonicity check:\n"; + for (Instruction &Inst : instructions(F)) { + if (!isa(Inst) && !isa(Inst)) + continue; + Value *Ptr = getLoadStorePointerOperand(&Inst); + const Loop *L = LI.getLoopFor(Inst.getParent()); + const SCEV *PtrSCEV = SE.getSCEVAtScope(Ptr, L); + const SCEV *AccessFn = SE.removePointerBase(PtrSCEV); + SCEVMonotonicity Mon = Checker.checkMonotonicity(AccessFn, L); + OS.indent(2) << "Inst: " << Inst << "\n"; + OS.indent(4) << "Expr: " << *AccessFn << "\n"; + Mon.print(OS, 4); + } + OS << "\n"; + } + for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; ++SrcI) { if (SrcI->mayReadOrWriteMemory()) { @@ -235,7 +430,8 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA, void DependenceAnalysisWrapperPass::print(raw_ostream &OS, const Module *) const { dumpExampleDependence( - OS, info.get(), getAnalysis().getSE(), false); + OS, info.get(), getAnalysis().getSE(), + getAnalysis().getLoopInfo(), false); } PreservedAnalyses @@ -244,7 +440,7 @@ DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { << "':\n"; dumpExampleDependence(OS, &FAM.getResult(F), FAM.getResult(F), - NormalizeResults); + FAM.getResult(F), NormalizeResults); return PreservedAnalyses::all(); } @@ -670,6 +866,81 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { return false; } +//===----------------------------------------------------------------------===// +// SCEVMonotonicity + +SCEVMonotonicity::SCEVMonotonicity(SCEVMonotonicityType Type, + const SCEV *FailurePoint) + : Type(Type), FailurePoint(FailurePoint) { + assert( + ((Type == SCEVMonotonicityType::Unknown) == (FailurePoint != nullptr)) && + "FailurePoint must be provided iff Type is Unknown"); +} + +void SCEVMonotonicity::print(raw_ostream &OS, unsigned Depth) const { + OS.indent(Depth) << "Monotonicity: "; + switch (Type) { + case SCEVMonotonicityType::Unknown: + assert(FailurePoint && "FailurePoint must be provided for Unknown"); + OS << "Unknown\n"; + OS.indent(Depth) << "Reason: " << *FailurePoint << "\n"; + break; + case SCEVMonotonicityType::Invariant: + OS << "Invariant\n"; + break; + case SCEVMonotonicityType::MultivariateSignedMonotonic: + OS << "MultivariateSignedMonotonic\n"; + break; + } +} + +bool SCEVMonotonicityChecker::isLoopInvariant(const SCEV *Expr) const { + return !OutermostLoop || SE->isLoopInvariant(Expr, OutermostLoop); +} + +SCEVMonotonicity SCEVMonotonicityChecker::invariantOrUnknown(const SCEV *Expr) { + if (isLoopInvariant(Expr)) + return SCEVMonotonicity(SCEVMonotonicityType::Invariant); + return createUnknown(Expr); +} + +SCEVMonotonicity +SCEVMonotonicityChecker::checkMonotonicity(const SCEV *Expr, + const Loop *OutermostLoop) { + assert(Expr->getType()->isIntegerTy() && "Expr must be integer type"); + this->OutermostLoop = OutermostLoop; + return visit(Expr); +} + +/// We only care about an affine AddRec at the moment. For an affine AddRec, +/// the monotonicity can be inferred from its nowrap property. For example, let +/// X and Y be loop-invariant, and assume Y is non-negative. An AddRec +/// {X,+.Y} implies: +/// +/// X <=s (X + Y) <=s ((X + Y) + Y) <=s ... +/// +/// Thus, we can conclude that the AddRec is monotonically increasing with +/// respect to the associated loop in a signed sense. The similar reasoning +/// applies when Y is non-positive, leading to a monotonically decreasing +/// AddRec. +SCEVMonotonicity +SCEVMonotonicityChecker::visitAddRecExpr(const SCEVAddRecExpr *Expr) { + if (!Expr->isAffine() || !Expr->hasNoSignedWrap()) + return createUnknown(Expr); + + const SCEV *Start = Expr->getStart(); + const SCEV *Step = Expr->getStepRecurrence(*SE); + + SCEVMonotonicity StartMon = visit(Start); + if (StartMon.isUnknown()) + return StartMon; + + if (!isLoopInvariant(Step)) + return createUnknown(Expr); + + return SCEVMonotonicity(SCEVMonotonicityType::MultivariateSignedMonotonic); +} + //===----------------------------------------------------------------------===// // DependenceInfo methods @@ -3488,10 +3759,19 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, // resize Pair to contain as many pairs of subscripts as the delinearization // has found, and then initialize the pairs following the delinearization. Pair.resize(Size); + SCEVMonotonicityChecker MonChecker(SE); + const Loop *OutermostLoop = SrcLoop ? SrcLoop->getOutermostLoop() : nullptr; for (int I = 0; I < Size; ++I) { Pair[I].Src = SrcSubscripts[I]; Pair[I].Dst = DstSubscripts[I]; unifySubscriptType(&Pair[I]); + + if (EnableMonotonicityCheck) { + if (MonChecker.checkMonotonicity(Pair[I].Src, OutermostLoop).isUnknown()) + return false; + if (MonChecker.checkMonotonicity(Pair[I].Dst, OutermostLoop).isUnknown()) + return false; + } } return true; @@ -3824,6 +4104,14 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, Pair[0].Src = SrcEv; Pair[0].Dst = DstEv; + SCEVMonotonicityChecker MonChecker(SE); + const Loop *OutermostLoop = SrcLoop ? SrcLoop->getOutermostLoop() : nullptr; + if (EnableMonotonicityCheck) + if (MonChecker.checkMonotonicity(Pair[0].Src, OutermostLoop).isUnknown() || + MonChecker.checkMonotonicity(Pair[0].Dst, OutermostLoop).isUnknown()) + return std::make_unique(Src, Dst, + SCEVUnionPredicate(Assume, *SE)); + if (Delinearize) { if (tryDelinearize(Src, Dst, Pair)) { LLVM_DEBUG(dbgs() << " delinearized\n"); diff --git a/llvm/test/Analysis/DependenceAnalysis/monotonicity-cast.ll b/llvm/test/Analysis/DependenceAnalysis/monotonicity-cast.ll new file mode 100644 index 0000000000000..e43d00d0bf651 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/monotonicity-cast.ll @@ -0,0 +1,207 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -disable-output -passes="print" -da-dump-monotonicity-report \ +; RUN: -da-enable-monotonicity-check 2>&1 | FileCheck %s + +; int8_t offset = start; +; for (int i = 0; i < 100; i++, offset += step) +; a[sext(offset)] = 0; +; +define void @sext_nsw(ptr %a, i8 %start, i8 %step) { +; CHECK-LABEL: 'sext_nsw' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: {(sext i8 %start to i64),+,(sext i8 %step to i64)}<%loop> +; CHECK-NEXT: Monotonicity: MultivariateSignedMonotonic +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - none! +; +entry: + br label %loop + +loop: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop ] + %offset = phi i8 [ %start, %entry ], [ %offset.next, %loop ] + %offset.sext = sext i8 %offset to i64 + %idx = getelementptr i8, ptr %a, i64 %offset.sext + store i8 0, ptr %idx + %i.inc = add nsw i64 %i, 1 + %offset.next = add nsw i8 %offset, %step + %exitcond = icmp eq i64 %i.inc, 100 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +; The addition for `%offset.next` can wrap, so we cannot prove monotonicity. +; +; int8_t offset = start; +; for (int i = 0; i < 100; i++, offset += step) +; a[sext(offset)] = 0; +; +define void @sext_may_wrap(ptr %a, i8 %start, i8 %step) { +; CHECK-LABEL: 'sext_may_wrap' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: (sext i8 {%start,+,%step}<%loop> to i64) +; CHECK-NEXT: Monotonicity: Unknown +; CHECK-NEXT: Reason: (sext i8 {%start,+,%step}<%loop> to i64) +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - confused! +; +entry: + br label %loop + +loop: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop ] + %offset = phi i8 [ %start, %entry ], [ %offset.next, %loop ] + %offset.sext = sext i8 %offset to i64 + %idx = getelementptr i8, ptr %a, i64 %offset.sext + store i8 0, ptr %idx + %i.inc = add nsw i64 %i, 1 + %offset.next = add i8 %offset, %step + %exitcond = icmp eq i64 %i.inc, 100 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +; for (int8_t i = 0; i < 100; i++) +; a[zext(offset)] = 0; +; +define void @zext_pos(ptr %a) { +; CHECK-LABEL: 'zext_pos' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: {0,+,1}<%loop> +; CHECK-NEXT: Monotonicity: MultivariateSignedMonotonic +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - none! +; +entry: + br label %loop + +loop: + %i = phi i8 [ 0, %entry ], [ %i.inc, %loop ] + %offset.zext = zext nneg i8 %i to i64 + %idx = getelementptr i8, ptr %a, i64 %offset.zext + store i8 0, ptr %idx + %i.inc = add nsw i8 %i, 1 + %exitcond = icmp eq i8 %i.inc, 100 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +; The zero-extened value of `offset` is no longer monotonic. In fact, the +; values of `offset` in each iteration are: +; +; iteration | 0 | 1 | 2 | ... +; -------------|-----|---|---|--------- +; offset | -1 | 0 | 1 | ... +; zext(offset) | 255 | 0 | 1 | ... +; +; +; for (int8_t i = -1; i < 100; i++) +; a[zext(offset)] = 0; +; +define void @zext_cross_zero(ptr %a) { +; CHECK-LABEL: 'zext_cross_zero' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: (zext i8 {-1,+,1}<%loop> to i64) +; CHECK-NEXT: Monotonicity: Unknown +; CHECK-NEXT: Reason: (zext i8 {-1,+,1}<%loop> to i64) +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - confused! +; +entry: + br label %loop + +loop: + %i = phi i8 [ -1, %entry ], [ %i.inc, %loop ] + %offset.zext = zext nneg i8 %i to i64 + %idx = getelementptr i8, ptr %a, i64 %offset.zext + store i8 0, ptr %idx + %i.inc = add nsw i8 %i, 1 + %exitcond = icmp eq i8 %i.inc, 100 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +; In principle, we can prove that `zext(offset)` is monotonic since we know +; that `offset` is non-negative. +; +; int8_t offset = 0; +; for (int i = 0; i < 100; i++, offset += step) +; a[zext(offset)] = 0; +; +define void @zext_nneg_nsw(ptr %a, i8 %step) { +; CHECK-LABEL: 'zext_nneg_nsw' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: (zext i8 {0,+,%step}<%loop> to i64) +; CHECK-NEXT: Monotonicity: Unknown +; CHECK-NEXT: Reason: (zext i8 {0,+,%step}<%loop> to i64) +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - confused! +; +entry: + br label %loop + +loop: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop ] + %offset = phi i8 [ 0, %entry ], [ %offset.next, %loop ] + %offset.zext = zext nneg i8 %offset to i64 + %idx = getelementptr i8, ptr %a, i64 %offset.zext + store i8 0, ptr %idx + %i.inc = add nsw i64 %i, 1 + %offset.next = add nsw i8 %offset, %step + %exitcond = icmp eq i64 %i.inc, 100 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +; SCEV handles `i & 1` as an i1 addrec. Ensure that the monotonicity analysis +; properly analyzes it. +; +; for (i = 0; i < 100; i++) +; a[i & 1] = 0; +; +define void @offset_truncated_to_i1(ptr %a) { +; CHECK-LABEL: 'offset_truncated_to_i1' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: (zext i1 {false,+,true}<%loop> to i64) +; CHECK-NEXT: Monotonicity: Unknown +; CHECK-NEXT: Reason: (zext i1 {false,+,true}<%loop> to i64) +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - confused! +; +entry: + br label %loop + +loop: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop ] + %and = and i64 %i, 1 + %idx = getelementptr inbounds i8, ptr %a, i64 %and + store i8 0, ptr %idx + %i.inc = add nsw i64 %i, 1 + %exitcond = icmp eq i64 %i.inc, 100 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/monotonicity-delinearize.ll b/llvm/test/Analysis/DependenceAnalysis/monotonicity-delinearize.ll new file mode 100644 index 0000000000000..71ea4e95059a0 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/monotonicity-delinearize.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -disable-output -passes="print" -da-dump-monotonicity-report \ +; RUN: -da-enable-monotonicity-check 2>&1 | FileCheck %s + +; The offset SCEV will be delinearized into a 2D array access, like as follows: +; +; - Outer subscript: {0,+,1}<%loop.i.header> +; - Inner subscript: {0,+,1}<%loop.j.header> +; +; These subscripts are both monotonic, but we also need to check the +; monotonicity of the original addrec. +; +; char A[...][32]; +; for (i = 0; i < 1ll << 62; i++) +; for (j = 0; j < 32; j++) +; if (i < (1ll << 57)) +; A[i][j] = 0; +; +define void @linearized_offset_wrap(ptr %a) { +; CHECK-LABEL: 'linearized_offset_wrap' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %gep, align 1 +; CHECK-NEXT: Expr: {{\{\{}}0,+,32}<%loop.i.header>,+,1}<%loop.j.header> +; CHECK-NEXT: Monotonicity: Unknown +; CHECK-NEXT: Reason: {{\{\{}}0,+,32}<%loop.i.header>,+,1}<%loop.j.header> +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %gep, align 1 --> Dst: store i8 0, ptr %gep, align 1 +; CHECK-NEXT: da analyze - confused! +; +entry: + br label %loop.i.header + +loop.i.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.i.latch ] + br label %loop.j.header + +loop.j.header: + %j = phi i64 [ 0, %loop.i.header ], [ %j.inc, %loop.j.latch ] + %cond = icmp slt i64 %i, 144115188075855872 ; 2^57 + br i1 %cond, label %if.then, label %loop.j.latch + +if.then: + %gep = getelementptr inbounds [32 x i8], ptr %a, i64 %i, i64 %j + store i8 0, ptr %gep + br label %loop.j.latch + +loop.j.latch: + %j.inc = add nuw nsw i64 %j, 1 + %ec.j = icmp eq i64 %j.inc, 32 + br i1 %ec.j, label %loop.i.latch, label %loop.j.header + +loop.i.latch: + %i.inc = add nuw nsw i64 %i, 1 + %ec.i = icmp eq i64 %i.inc, 4611686018427387904 ; 2^62 + br i1 %ec.i, label %exit, label %loop.i.header + +exit: + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/monotonicity-invariant.ll b/llvm/test/Analysis/DependenceAnalysis/monotonicity-invariant.ll new file mode 100644 index 0000000000000..e5b6ddbaca6fe --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/monotonicity-invariant.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -disable-output -passes="print" -da-dump-monotonicity-report \ +; RUN: -da-enable-monotonicity-check 2>&1 | FileCheck %s + +; for (int i = 0; i < n; i++) +; a[x] = 0; +define void @single_loop_invariant(ptr %a, i64 %x, i64 %n) { +; CHECK-LABEL: 'single_loop_invariant' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: %x +; CHECK-NEXT: Monotonicity: Invariant +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - consistent output [S]! +; +entry: + %guard = icmp sgt i64 %n, 0 + br i1 %guard, label %loop, label %exit + +loop: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop ] + %idx = getelementptr inbounds i8, ptr %a, i64 %x + store i8 0, ptr %idx + %i.inc = add nsw i64 %i, 1 + %exitcond = icmp eq i64 %i.inc, %n + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +; for (int i = 0; i < n; i++) +; a[(i % 2 == 0 ? x : y)] = 0; +define void @single_loop_variant(ptr %a, i64 %x, i64 %y, i64 %n) { +; CHECK-LABEL: 'single_loop_variant' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: %offset +; CHECK-NEXT: Monotonicity: Unknown +; CHECK-NEXT: Reason: %offset +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - confused! +; +entry: + %guard = icmp sgt i64 %n, 0 + br i1 %guard, label %loop, label %exit + +loop: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop ] + %offset = phi i64 [ %x, %entry ], [ %offset.next, %loop ] + %offset.next = phi i64 [ %y, %entry ], [ %offset, %loop ] + %idx = getelementptr inbounds i8, ptr %a, i64 %offset + store i8 0, ptr %idx + %i.inc = add nsw i64 %i, 1 + %exitcond = icmp eq i64 %i.inc, %n + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +; for (int i = 0; i < n; i++) +; for (int j = 0; j < m; j++) +; a[x + i] = 0; +define void @invariant_plus_monotonic0(ptr %a, i64 %x, i64 %n, i64 %m) { +; CHECK-LABEL: 'invariant_plus_monotonic0' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: {%x,+,1}<%loop.i.header> +; CHECK-NEXT: Monotonicity: MultivariateSignedMonotonic +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - consistent output [0 S]! +; +entry: + %guard.i = icmp sgt i64 %n, 0 + br i1 %guard.i, label %loop.i.header, label %exit + +loop.i.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.i.latch ] + %offset = phi i64 [ %x, %entry ], [ %offset.inc, %loop.i.latch ] + br label %loop.j.preheader + +loop.j.preheader: + %gurard.j = icmp sgt i64 %m, 0 + br i1 %gurard.j, label %loop.j, label %loop.i.latch + +loop.j: + %j = phi i64 [ 0, %loop.j.preheader ], [ %j.inc, %loop.j ] + %idx = getelementptr inbounds i8, ptr %a, i64 %offset + store i8 0, ptr %idx + %j.inc = add nuw nsw i64 %j, 1 + %exitcond.j = icmp eq i64 %j.inc, %m + br i1 %exitcond.j, label %loop.i.latch, label %loop.j + +loop.i.latch: + %i.inc = add nsw i64 %i, 1 + %offset.inc = add nsw i64 %offset, 1 + %exitcond.i = icmp eq i64 %i.inc, %n + br i1 %exitcond.i, label %exit, label %loop.i.header + +exit: + ret void +} + +; for (int i = 0; i < n; i++) +; for (int j = 0; j < m; j++) +; a[x + j] = 0; +define void @invariant_plus_monotonic1(ptr %a, i64 %x, i64 %n, i64 %m) { +; CHECK-LABEL: 'invariant_plus_monotonic1' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: {%x,+,1}<%loop.j> +; CHECK-NEXT: Monotonicity: MultivariateSignedMonotonic +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - consistent output [S 0]! +; +entry: + %guard.i = icmp sgt i64 %n, 0 + br i1 %guard.i, label %loop.i.header, label %exit + +loop.i.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.i.latch ] + br label %loop.j.preheader + +loop.j.preheader: + %gurard.j = icmp sgt i64 %m, 0 + br i1 %gurard.j, label %loop.j, label %loop.i.latch + +loop.j: + %j = phi i64 [ 0, %loop.j.preheader ], [ %j.inc, %loop.j ] + %offset = phi i64 [ %x, %loop.j.preheader ], [ %offset.inc, %loop.j ] + %idx = getelementptr inbounds i8, ptr %a, i64 %offset + store i8 0, ptr %idx + %j.inc = add nuw nsw i64 %j, 1 + %offset.inc = add nsw i64 %offset, 1 + %exitcond.j = icmp eq i64 %j.inc, %m + br i1 %exitcond.j, label %loop.i.latch, label %loop.j + +loop.i.latch: + %i.inc = add nsw i64 %i, 1 + %exitcond.i = icmp eq i64 %i.inc, %n + br i1 %exitcond.i, label %exit, label %loop.i.header + +exit: + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/monotonicity-no-wrap-flags.ll b/llvm/test/Analysis/DependenceAnalysis/monotonicity-no-wrap-flags.ll new file mode 100644 index 0000000000000..7411dc9f5c053 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/monotonicity-no-wrap-flags.ll @@ -0,0 +1,519 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -disable-output -passes="print" -da-dump-monotonicity-report \ +; RUN: -da-enable-monotonicity-check 2>&1 | FileCheck %s + +; for (int i = 0; i < n; i++) +; a[i] = 0; +; +define void @single_loop_nsw(ptr %a, i64 %n) { +; CHECK-LABEL: 'single_loop_nsw' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: {0,+,1}<%loop> +; CHECK-NEXT: Monotonicity: MultivariateSignedMonotonic +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - none! +; +entry: + %guard = icmp sgt i64 %n, 0 + br i1 %guard, label %loop, label %exit + +loop: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop ] + %idx = getelementptr inbounds i8, ptr %a, i64 %i + store i8 0, ptr %idx + %i.inc = add nsw i64 %i, 1 + %exitcond = icmp eq i64 %i.inc, %n + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +; The purpose of the variable `begin` is to avoid violating the size limitation +; of the allocated object in LLVM IR, which would cause UB. +; +; for (unsigned long long i = begin; i < end; i++) +; a[i] = 0; +; +define void @single_loop_nuw(ptr %a, i64 %begin, i64 %end) { +; CHECK-LABEL: 'single_loop_nuw' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: {%begin,+,1}<%loop> +; CHECK-NEXT: Monotonicity: Unknown +; CHECK-NEXT: Reason: {%begin,+,1}<%loop> +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - confused! +; +entry: + %guard = icmp ult i64 %begin, %end + br i1 %guard, label %loop, label %exit + +loop: + %i = phi i64 [ %begin, %entry ], [ %i.inc, %loop ] + %idx = getelementptr i8, ptr %a, i64 %i + store i8 0, ptr %idx + %i.inc = add nuw i64 %i, 1 + %exitcond = icmp eq i64 %i.inc, %end + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +; for (int i = 0; i < n; i++) +; for (int j = 0; j < m; j++) +; a[i + j] = 0; +; +define void @nested_loop_nsw0(ptr %a, i64 %n, i64 %m) { +; CHECK-LABEL: 'nested_loop_nsw0' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: {{\{\{}}0,+,1}<%loop.i.header>,+,1}<%loop.j> +; CHECK-NEXT: Monotonicity: MultivariateSignedMonotonic +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - output [* *]! +; +entry: + %guard.i = icmp sgt i64 %n, 0 + br i1 %guard.i, label %loop.i.header, label %exit + +loop.i.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.i.latch ] + br label %loop.j.preheader + +loop.j.preheader: + %gurard.j = icmp sgt i64 %m, 0 + br i1 %gurard.j, label %loop.j, label %loop.i.latch + +loop.j: + %j = phi i64 [ 0, %loop.j.preheader ], [ %j.inc, %loop.j ] + %offset = add nsw i64 %i, %j + %idx = getelementptr inbounds i8, ptr %a, i64 %offset + store i8 0, ptr %idx + %j.inc = add nsw i64 %j, 1 + %exitcond.j = icmp eq i64 %j.inc, %m + br i1 %exitcond.j, label %loop.i.latch, label %loop.j + +loop.i.latch: + %i.inc = add nsw i64 %i, 1 + %exitcond.i = icmp eq i64 %i.inc, %n + br i1 %exitcond.i, label %exit, label %loop.i.header + +exit: + ret void +} + +; for (int i = n - 1; i >= 0; i--) +; for (int j = 0; j < m; j++) +; a[i + j] = 0; +; +define void @nested_loop_nsw1(ptr %a, i64 %n, i64 %m) { +; CHECK-LABEL: 'nested_loop_nsw1' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: {{\{\{}}(-1 + %n),+,-1}<%loop.i.header>,+,1}<%loop.j> +; CHECK-NEXT: Monotonicity: MultivariateSignedMonotonic +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - output [* *]! +; +entry: + %guard.i = icmp sgt i64 %n, 0 + br i1 %guard.i, label %loop.i.header, label %exit + +loop.i.header: + %i = phi i64 [ %n, %entry ], [ %i.dec, %loop.i.latch ] + %i.dec = add nsw i64 %i, -1 + br label %loop.j.preheader + +loop.j.preheader: + %gurard.j = icmp sgt i64 %m, 0 + br i1 %gurard.j, label %loop.j, label %loop.i.latch + +loop.j: + %j = phi i64 [ 0, %loop.j.preheader ], [ %j.inc, %loop.j ] + %offset = add nsw i64 %i.dec, %j + %idx = getelementptr inbounds i8, ptr %a, i64 %offset + store i8 0, ptr %idx + %j.inc = add nsw i64 %j, 1 + %exitcond.j = icmp eq i64 %j.inc, %m + br i1 %exitcond.j, label %loop.i.latch, label %loop.j + +loop.i.latch: + %exitcond.i = icmp eq i64 %i.dec, 0 + br i1 %exitcond.i, label %exit, label %loop.i.header + +exit: + ret void +} + +; for (int i = 0; i < n; i--) +; for (int j = 0; j < m; j++) +; a[i - j] = 0; +; +define void @nested_loop_nsw2(ptr %a, i64 %n, i64 %m) { +; CHECK-LABEL: 'nested_loop_nsw2' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: {{\{\{}}0,+,1}<%loop.i.header>,+,-1}<%loop.j> +; CHECK-NEXT: Monotonicity: MultivariateSignedMonotonic +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - output [* *]! +; +entry: + %guard.i = icmp sgt i64 %n, 0 + br i1 %guard.i, label %loop.i.header, label %exit + +loop.i.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.i.latch ] + br label %loop.j.preheader + +loop.j.preheader: + %gurard.j = icmp sgt i64 %m, 0 + br i1 %gurard.j, label %loop.j, label %loop.i.latch + +loop.j: + %j = phi i64 [ 0, %loop.j.preheader ], [ %j.inc, %loop.j ] + %offset = sub nsw i64 %i, %j + %idx = getelementptr inbounds i8, ptr %a, i64 %offset + store i8 0, ptr %idx + %j.inc = add nsw i64 %j, 1 + %exitcond.j = icmp eq i64 %j.inc, %m + br i1 %exitcond.j, label %loop.i.latch, label %loop.j + +loop.i.latch: + %i.inc = add nsw i64 %i, 1 + %exitcond.i = icmp eq i64 %i.inc, %n + br i1 %exitcond.i, label %exit, label %loop.i.header + +exit: + ret void +} + +; for (int i = begin0; i < end0; i++) +; for (int j = begin1; j < end1; j++) { +; unsigned long long offset = (unsigned long long)i + (unsigned long long)j; +; a[offset] = 0; +; } +; +define void @nested_loop_nuw(ptr %a, i64 %begin0, i64 %end0, i64 %begin1, i64 %end1) { +; CHECK-LABEL: 'nested_loop_nuw' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: {{\{\{}}(%begin0 + %begin1),+,1}<%loop.i.header>,+,1}<%loop.j> +; CHECK-NEXT: Monotonicity: Unknown +; CHECK-NEXT: Reason: {{\{\{}}(%begin0 + %begin1),+,1}<%loop.i.header>,+,1}<%loop.j> +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - confused! +; +entry: + %guard.i.0 = icmp slt i64 0, %begin0 + %guard.i.1 = icmp slt i64 %begin0, %end0 + %guard.i.2 = icmp slt i64 0, %end0 + %and.i.0 = and i1 %guard.i.0, %guard.i.1 + %and.i.1 = and i1 %and.i.0, %guard.i.2 + br i1 %and.i.1, label %loop.i.header, label %exit + +loop.i.header: + %i = phi i64 [ %begin0, %entry ], [ %i.inc, %loop.i.latch ] + br label %loop.j.preheader + +loop.j.preheader: + %guard.j.0 = icmp slt i64 0, %begin1 + %guard.j.1 = icmp slt i64 %begin1, %end1 + %guard.j.2 = icmp slt i64 0, %end1 + %and.j.0 = and i1 %guard.j.0, %guard.j.1 + %and.j.1 = and i1 %and.j.0, %guard.j.2 + br i1 %and.j.1, label %loop.j, label %loop.i.latch + +loop.j: + %j = phi i64 [ %begin1, %loop.j.preheader ], [ %j.inc, %loop.j ] + %offset = add nuw i64 %i, %j + %idx = getelementptr i8, ptr %a, i64 %offset + store i8 0, ptr %idx + %j.inc = add nsw i64 %j, 1 + %exitcond.j = icmp eq i64 %j.inc, %end1 + br i1 %exitcond.j, label %loop.i.latch, label %loop.j + +loop.i.latch: + %i.inc = add nsw i64 %i, 1 + %exitcond.i = icmp eq i64 %i.inc, %end0 + br i1 %exitcond.i, label %exit, label %loop.i.header + +exit: + ret void +} + +; for (int i = 0; i < n; i++) +; for (int j = 0; j < m; j++) +; a[i + step*j] = 0; +; +define void @nested_loop_step(ptr %a, i64 %n, i64 %m, i64 %step) { +; CHECK-LABEL: 'nested_loop_step' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: {{\{\{}}0,+,1}<%loop.i.header>,+,%step}<%loop.j> +; CHECK-NEXT: Monotonicity: MultivariateSignedMonotonic +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - output [* *]! +; +entry: + %guard.i = icmp sgt i64 %n, 0 + br i1 %guard.i, label %loop.i.header, label %exit + +loop.i.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.i.latch ] + br label %loop.j.preheader + +loop.j.preheader: + %gurard.j = icmp sgt i64 %m, 0 + br i1 %gurard.j, label %loop.j, label %loop.i.latch + +loop.j: + %j = phi i64 [ 0, %loop.j.preheader ], [ %j.inc, %loop.j ] + %offset.j = phi i64 [ 0, %loop.j.preheader ], [ %offset.j.next, %loop.j ] + %offset = add nsw i64 %i, %offset.j + %idx = getelementptr inbounds i8, ptr %a, i64 %offset + store i8 0, ptr %idx + %j.inc = add nsw i64 %j, 1 + %offset.j.next = add nsw i64 %offset.j, %step + %exitcond.j = icmp eq i64 %j.inc, %m + br i1 %exitcond.j, label %loop.i.latch, label %loop.j + +loop.i.latch: + %i.inc = add nsw i64 %i, 1 + %exitcond.i = icmp eq i64 %i.inc, %n + br i1 %exitcond.i, label %exit, label %loop.i.header + +exit: + ret void +} + +; The value of step reccurence is not invariant with respect to the outer most +; loop (the i-loop). +; +; offset_i = 0; +; for (int i = 0; i < 100; i++) { +; for (int j = 0; j < 100; j++) +; a[offset_i + j] = 0; +; offset_i += (i % 2 == 0) ? 0 : 3; +; } +; +define void @step_is_variant(ptr %a) { +; CHECK-LABEL: 'step_is_variant' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: {%offset.i,+,1}<%loop.j> +; CHECK-NEXT: Monotonicity: MultivariateSignedMonotonic +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - confused! +; +entry: + br label %loop.i.header + +loop.i.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.i.latch ] + %offset.i = phi i64 [ 0, %entry ], [ %offset.i.next, %loop.i.latch ] + %step.i.0 = phi i64 [ 0, %entry ], [ %step.i.1, %loop.i.latch ] + %step.i.1 = phi i64 [ 3, %entry ], [ %step.i.0, %loop.i.latch ] + br label %loop.j + +loop.j: + %j = phi i64 [ 0, %loop.i.header ], [ %j.inc, %loop.j ] + %offset = add nsw i64 %offset.i, %j + %idx = getelementptr inbounds i8, ptr %a, i64 %offset + store i8 0, ptr %idx + %j.inc = add nsw i64 %j, 1 + %exitcond.j = icmp eq i64 %j.inc, 100 + br i1 %exitcond.j, label %loop.i.latch, label %loop.j + +loop.i.latch: + %i.inc = add nsw i64 %i, 1 + %offset.i.next = add nsw i64 %offset.i, %step.i.0 + %exitcond.i = icmp eq i64 %i.inc, 100 + br i1 %exitcond.i, label %exit, label %loop.i.header + +exit: + ret void +} + +; The AddRec doesn't have nsw flag for the j-loop, since the store may not be +; executed. +; +; for (int i = 0; i < n; i++) +; for (int j = 0; j < m; j++) +; if (cond) +; a[i + j] = 0; +; +define void @conditional_store0(ptr %a, i64 %n, i64 %m) { +; CHECK-LABEL: 'conditional_store0' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: {{\{\{}}0,+,1}<%loop.i.header>,+,1}<%loop.j.header> +; CHECK-NEXT: Monotonicity: Unknown +; CHECK-NEXT: Reason: {{\{\{}}0,+,1}<%loop.i.header>,+,1}<%loop.j.header> +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - confused! +; +entry: + %guard.i = icmp sgt i64 %n, 0 + br i1 %guard.i, label %loop.i.header, label %exit + +loop.i.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.i.latch ] + br label %loop.j.preheader + +loop.j.preheader: + %gurard.j = icmp sgt i64 %m, 0 + br i1 %gurard.j, label %loop.j.header, label %loop.i.latch + +loop.j.header: + %j = phi i64 [ 0, %loop.j.preheader ], [ %j.inc, %loop.j.latch ] + %offset = add nsw i64 %i, %j + %cond = freeze i1 poison + br i1 %cond, label %if.then, label %loop.j.latch + +if.then: + %idx = getelementptr inbounds i8, ptr %a, i64 %offset + store i8 0, ptr %idx + br label %loop.j.latch + +loop.j.latch: + %j.inc = add nsw i64 %j, 1 + %exitcond.j = icmp eq i64 %j.inc, %m + br i1 %exitcond.j, label %loop.i.latch, label %loop.j.header + +loop.i.latch: + %i.inc = add nsw i64 %i, 1 + %exitcond.i = icmp eq i64 %i.inc, %n + br i1 %exitcond.i, label %exit, label %loop.i.header + +exit: + ret void +} + +; Similar to the @conditional_store0, but the definition of the `%offset` is +; different from it and we can infer `nsw` in this case. +; +; for (int i = 0; i < n; i++) +; for (int j = 0; j < m; j++) +; if (cond) +; a[i + j] = 0; +; +define void @conditional_store1(ptr %a, i64 %n, i64 %m) { +; CHECK-LABEL: 'conditional_store1' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: Expr: {{\{\{}}0,+,1}<%loop.i.header>,+,1}<%loop.j.header> +; CHECK-NEXT: Monotonicity: MultivariateSignedMonotonic +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1 +; CHECK-NEXT: da analyze - output [* *]! +; +entry: + %guard.i = icmp sgt i64 %n, 0 + br i1 %guard.i, label %loop.i.header, label %exit + +loop.i.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.i.latch ] + br label %loop.j.preheader + +loop.j.preheader: + %gurard.j = icmp sgt i64 %m, 0 + br i1 %gurard.j, label %loop.j.header, label %loop.i.latch + +loop.j.header: + %j = phi i64 [ 0, %loop.j.preheader ], [ %j.inc, %loop.j.latch ] + %offset = phi i64 [ %i, %loop.j.preheader ], [ %offset.next, %loop.j.latch ] + %cond = freeze i1 poison + br i1 %cond, label %if.then, label %loop.j.latch + +if.then: + %idx = getelementptr inbounds i8, ptr %a, i64 %offset + store i8 0, ptr %idx + br label %loop.j.latch + +loop.j.latch: + %j.inc = add nsw i64 %j, 1 + %offset.next = add nsw i64 %offset, 1 + %exitcond.j = icmp eq i64 %j.inc, %m + br i1 %exitcond.j, label %loop.i.latch, label %loop.j.header + +loop.i.latch: + %i.inc = add nsw i64 %i, 1 + %exitcond.i = icmp eq i64 %i.inc, %n + br i1 %exitcond.i, label %exit, label %loop.i.header + +exit: + ret void +} + +; In the following case, the computation `offset = offset_i + j` will not wrap, +; but `offset_i += 1024` will wrap both in a signed sense and an unsigned +; sense. We cannot prove the monotonicity in this case. +; +; offset_i = (1ULL << 63) - 256; +; for (i = 0; i < (1ULL << 62); i++, offset_i += 1024) +; for (j = 0; j < 32; j++) { +; offset = offset_i + j; +; +; // The value of `offset` is positive in a signed sense. +; if (offset < (1ULL << 63)) +; a[offset] = 0; +; } +; +define void @outer_loop_may_wrap(ptr %a) { +; CHECK-LABEL: 'outer_loop_may_wrap' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 0, ptr %gep, align 1 +; CHECK-NEXT: Expr: {{\{\{}}9223372036854775552,+,1024}<%loop.i.header>,+,1}<%loop.j.header> +; CHECK-NEXT: Monotonicity: Unknown +; CHECK-NEXT: Reason: {9223372036854775552,+,1024}<%loop.i.header> +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 0, ptr %gep, align 1 --> Dst: store i8 0, ptr %gep, align 1 +; CHECK-NEXT: da analyze - confused! +; +entry: + br label %loop.i.header + +loop.i.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.i.latch ] + %subscript.i = phi i64 [ 9223372036854775552, %entry ], [ %subscript.i.next, %loop.i.latch ] ; The initial value is 2^63 - 256 + br label %loop.j.header + +loop.j.header: + %j = phi i64 [ 0, %loop.i.header ], [ %j.inc, %loop.j.latch ] + %subscript = phi i64 [ %subscript.i, %loop.i.header ], [ %subscript.next, %loop.j.latch ] + %cond = icmp sge i64 %subscript, 0 + br i1 %cond, label %if.then, label %loop.j.latch + +if.then: + %gep = getelementptr inbounds i8, ptr %a, i64 %subscript + store i8 0, ptr %gep + br label %loop.j.latch + +loop.j.latch: + %j.inc = add nuw nsw i64 %j, 1 + %subscript.next = add nuw nsw i64 %subscript, 1 + %ec.j = icmp eq i64 %j.inc, 32 + br i1 %ec.j, label %loop.i.latch, label %loop.j.header + +loop.i.latch: + %i.inc = add nuw nsw i64 %i, 1 + %subscript.i.next = add i64 %subscript.i, 1024 + %ec.i = icmp eq i64 %i.inc, 4611686018427387904 ; 2^62 + br i1 %ec.i, label %exit, label %loop.i.header + +exit: + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/non-monotonic.ll b/llvm/test/Analysis/DependenceAnalysis/non-monotonic.ll new file mode 100644 index 0000000000000..6247336456d2c --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/non-monotonic.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -disable-output -passes="print" -da-dump-monotonicity-report \ +; RUN: -da-enable-monotonicity-check 2>&1 | FileCheck %s +; RUN: opt < %s -disable-output -passes="print" 2>&1 | FileCheck %s -check-prefix=DISABLE-CHECK + +; +; for (i = 0; i < (1ULL << 60); i++) { +; A[i] = 1; +; +; unsigned long long offset = i * 32 + (1ULL << 62); +; // offset is positive when interpreted as a signed value. +; // To prevent violating the size limitation for an allocated object. +; if (offset < (1ULL << 63)) +; A[offset] = 2; +; } +; +; ----------------------------------------------------------------------------- +; +; There is a dependency between the two stores. To detect it, we need to check +; the monotonicity and bail out the analysis since `offset` is not monotonic. +; +; memory location | first store (A[i]) | second store (A[offset]) +; ------------------|--------------------|---------------------------- +; A[0] | i = 0 | i = 2^59 - 2^57 +; A[2^60 - 32] | i = 2^60 - 32 | i = 2^59 - 2^57 + 2^55 - 1 +; +define void @f(ptr %A) { +; CHECK-LABEL: 'f' +; CHECK-NEXT: Monotonicity check: +; CHECK-NEXT: Inst: store i8 1, ptr %idx.0, align 1 +; CHECK-NEXT: Expr: {0,+,1}<%loop.header> +; CHECK-NEXT: Monotonicity: MultivariateSignedMonotonic +; CHECK-NEXT: Inst: store i8 2, ptr %idx.1, align 1 +; CHECK-NEXT: Expr: {4611686018427387904,+,32}<%loop.header> +; CHECK-NEXT: Monotonicity: Unknown +; CHECK-NEXT: Reason: {4611686018427387904,+,32}<%loop.header> +; CHECK-EMPTY: +; CHECK-NEXT: Src: store i8 1, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.0, align 1 +; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: Src: store i8 1, ptr %idx.0, align 1 --> Dst: store i8 2, ptr %idx.1, align 1 +; CHECK-NEXT: da analyze - confused! +; CHECK-NEXT: Src: store i8 2, ptr %idx.1, align 1 --> Dst: store i8 2, ptr %idx.1, align 1 +; CHECK-NEXT: da analyze - confused! +; +; DISABLE-CHECK-LABEL: 'f' +; DISABLE-CHECK-NEXT: Src: store i8 1, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.0, align 1 +; DISABLE-CHECK-NEXT: da analyze - none! +; DISABLE-CHECK-NEXT: Src: store i8 1, ptr %idx.0, align 1 --> Dst: store i8 2, ptr %idx.1, align 1 +; DISABLE-CHECK-NEXT: da analyze - none! +; DISABLE-CHECK-NEXT: Src: store i8 2, ptr %idx.1, align 1 --> Dst: store i8 2, ptr %idx.1, align 1 +; DISABLE-CHECK-NEXT: da analyze - none! +; +entry: + br label %loop.header + +loop.header: + %i = phi i64 [ 0, %entry ], [ %i.next, %loop.latch ] + %idx.0 = getelementptr inbounds i8, ptr %A, i64 %i + store i8 1, ptr %idx.0 + %offset.tmp = mul i64 %i, 32 + %offset = add i64 %offset.tmp, 4611686018427387904 ; 1ULL << 62 + %if.cond = icmp sge i64 %offset, 0 + br i1 %if.cond, label %if.then, label %loop.latch + +if.then: + %idx.1 = getelementptr inbounds i8, ptr %A, i64 %offset + store i8 2, ptr %idx.1 + br label %loop.latch + +loop.latch: + %i.next = add nuw nsw i64 %i, 1 + %exit.cond = icmp eq i64 %i.next, 1152921504606846976 ; 1ULL << 60 + br i1 %exit.cond, label %exit, label %loop.header + +exit: + ret void +} From cc850b830cf1a6972d835825d54e1090c5b0217d Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 21 Oct 2025 10:25:07 +0100 Subject: [PATCH 13/99] [VPlan] Use VPlan::getRegion to shorten code (NFC) (#164287) --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index e060e7081042a..51019eda9c3df 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2092,8 +2092,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo { // Recipes in replicate regions implicitly depend on predicate. If either // recipe is in a replicate region, only consider them equal if both have // the same parent. - const VPRegionBlock *RegionL = L->getParent()->getParent(); - const VPRegionBlock *RegionR = R->getParent()->getParent(); + const VPRegionBlock *RegionL = L->getRegion(); + const VPRegionBlock *RegionR = R->getRegion(); if (((RegionL && RegionL->isReplicator()) || (RegionR && RegionR->isReplicator())) && L->getParent() != R->getParent()) @@ -3867,8 +3867,7 @@ void VPlanTransforms::materializePacksAndUnpacks(VPlan &Plan) { // required lanes implicitly. // TODO: Remove once replicate regions are unrolled completely. auto IsCandidateUnpackUser = [Def](VPUser *U) { - VPRegionBlock *ParentRegion = - cast(U)->getParent()->getParent(); + VPRegionBlock *ParentRegion = cast(U)->getRegion(); return U->usesScalars(Def) && (!ParentRegion || !ParentRegion->isReplicator()); }; From 3fbae10faaba3de1c87d9af1c986147fb8fd5a42 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 21 Oct 2025 10:27:03 +0100 Subject: [PATCH 14/99] [VPlan] Improve code using m_APInt (NFC) (#161683) --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 51019eda9c3df..688a013eb353f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1478,11 +1478,8 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan, if (!Plan.getVectorLoopRegion()) return false; - if (!Plan.getTripCount()->isLiveIn()) - return false; - auto *TC = dyn_cast_if_present( - Plan.getTripCount()->getUnderlyingValue()); - if (!TC || !BestVF.isFixed()) + const APInt *TC; + if (!BestVF.isFixed() || !match(Plan.getTripCount(), m_APInt(TC))) return false; // Calculate the minimum power-of-2 bit width that can fit the known TC, VF @@ -1495,7 +1492,7 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan, return std::max(PowerOf2Ceil(MaxVal.getActiveBits()), 8); }; unsigned NewBitWidth = - ComputeBitWidth(TC->getValue(), BestVF.getKnownMinValue() * BestUF); + ComputeBitWidth(*TC, BestVF.getKnownMinValue() * BestUF); LLVMContext &Ctx = Plan.getContext(); auto *NewIVTy = IntegerType::get(Ctx, NewBitWidth); From 1360aecb010a9f3c29221c6ee7823a19bdc4dc7e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 21 Oct 2025 10:35:16 +0100 Subject: [PATCH 15/99] [SystemZ] Avoid trunc(add(X,X)) patterns (#164378) Replace with trunc(add(X,Y)) to avoid premature folding in upcoming patch #164227 --- llvm/test/CodeGen/SystemZ/int-conv-14.ll | 45 +++++++++++++----------- llvm/test/CodeGen/SystemZ/int-conv-15.ll | 45 +++++++++++++----------- 2 files changed, 50 insertions(+), 40 deletions(-) diff --git a/llvm/test/CodeGen/SystemZ/int-conv-14.ll b/llvm/test/CodeGen/SystemZ/int-conv-14.ll index 98dc88f289620..baab5ac7f4b5c 100644 --- a/llvm/test/CodeGen/SystemZ/int-conv-14.ll +++ b/llvm/test/CodeGen/SystemZ/int-conv-14.ll @@ -55,14 +55,15 @@ define i128 @f4(ptr %ptr) { } ; Truncation to i64. -define i64 @f5(i128 %a) { +define i64 @f5(i128 %a, i128 %b) { ; CHECK-LABEL: f5: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvg %r2, %v0, 1 ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i64 ret i64 %res } @@ -134,15 +135,16 @@ define i128 @f10(ptr %ptr) { } ; Truncation to i32. -define i32 @f11(i128 %a) { +define i32 @f11(i128 %a, i128 %b) { ; CHECK-LABEL: f11: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvf %r2, %v0, 3 ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i32 ret i32 %res } @@ -215,15 +217,16 @@ define i128 @f16(ptr %ptr) { } ; Truncation to i16. -define i16 @f17(i128 %a) { +define i16 @f17(i128 %a, i128 %b) { ; CHECK-LABEL: f17: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvf %r2, %v0, 3 ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i16 ret i16 %res } @@ -296,15 +299,16 @@ define i128 @f22(ptr %ptr) { } ; Truncation to i8. -define i8 @f23(i128 %a) { +define i8 @f23(i128 %a, i128 %b) { ; CHECK-LABEL: f23: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvf %r2, %v0, 3 ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i8 ret i8 %res } @@ -385,15 +389,16 @@ define i128 @f28(ptr %ptr) { } ; Truncation to i1. -define i1 @f29(i128 %a) { +define i1 @f29(i128 %a, i128 %b) { ; CHECK-LABEL: f29: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvf %r2, %v0, 3 ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i1 ret i1 %res } diff --git a/llvm/test/CodeGen/SystemZ/int-conv-15.ll b/llvm/test/CodeGen/SystemZ/int-conv-15.ll index 0d8ee75b10b85..f2c9ee5fa1f57 100644 --- a/llvm/test/CodeGen/SystemZ/int-conv-15.ll +++ b/llvm/test/CodeGen/SystemZ/int-conv-15.ll @@ -55,14 +55,15 @@ define i128 @f4(ptr %ptr) { } ; Truncation to i64. -define i64 @f5(i128 %a) { +define i64 @f5(i128 %a, i128 %b) { ; CHECK-LABEL: f5: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvg %r2, %v0, 1 ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i64 ret i64 %res } @@ -134,15 +135,16 @@ define i128 @f10(ptr %ptr) { } ; Truncation to i32. -define i32 @f11(i128 %a) { +define i32 @f11(i128 %a, i128 %b) { ; CHECK-LABEL: f11: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvf %r2, %v0, 3 ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i32 ret i32 %res } @@ -215,15 +217,16 @@ define i128 @f16(ptr %ptr) { } ; Truncation to i16. -define i16 @f17(i128 %a) { +define i16 @f17(i128 %a, i128 %b) { ; CHECK-LABEL: f17: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvf %r2, %v0, 3 ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i16 ret i16 %res } @@ -296,15 +299,16 @@ define i128 @f22(ptr %ptr) { } ; Truncation to i8. -define i8 @f23(i128 %a) { +define i8 @f23(i128 %a, i128 %b) { ; CHECK-LABEL: f23: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvf %r2, %v0, 3 ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i8 ret i8 %res } @@ -383,15 +387,16 @@ define i128 @f28(ptr %ptr) { } ; Truncation to i1. -define i1 @f29(i128 %a) { +define i1 @f29(i128 %a, i128 %b) { ; CHECK-LABEL: f29: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvf %r2, %v0, 3 ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i1 ret i1 %res } From efcda547945e1c079a016a2184fde5b3153e4621 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Tue, 21 Oct 2025 11:38:45 +0200 Subject: [PATCH 16/99] [clang][CodeGen] Emit `llvm.tbaa.errno` metadata during module creation Let Clang emit `llvm.tbaa.errno` metadata in order to let LLVM carry out optimizations around errno-writing libcalls to, as long as it is proved the involved memory location does not alias `errno`. Previous discussion: https://discourse.llvm.org/t/rfc-modelling-errno-memory-effects/82972. --- clang/lib/CodeGen/CodeGenModule.cpp | 12 + clang/test/C/C11/n1285_1.c | 52 +- clang/test/CodeGen/AArch64/ABI-align-packed.c | 37 +- clang/test/CodeGen/AArch64/fp8-init-list.c | 16 +- clang/test/CodeGen/AArch64/ls64-inline-asm.c | 32 +- .../LoongArch/inline-asm-operand-modifiers.c | 4 +- .../CodeGen/LoongArch/lasx/builtin-alias.c | 3942 ++++++++--------- .../lasx/builtin-approximate-alias.c | 22 +- .../LoongArch/lasx/builtin-approximate.c | 22 +- clang/test/CodeGen/LoongArch/lasx/builtin.c | 3934 ++++++++-------- .../LoongArch/lasx/inline-asm-gcc-regs.c | 8 +- .../lasx/inline-asm-operand-modifier.c | 2 +- .../LoongArch/lsx/inline-asm-gcc-regs.c | 8 +- .../lsx/inline-asm-operand-modifier.c | 2 +- .../PowerPC/builtins-dmf-vsx-vector-float.c | 411 +- .../PowerPC/builtins-ppc-build-pair-mma.c | 28 +- clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c | 128 +- .../CodeGen/PowerPC/builtins-ppc-pair-mma.c | 270 +- .../attr-rvv-vector-bits-bitcast-less-8.c | 14 +- .../RISCV/attr-rvv-vector-bits-bitcast.c | 54 +- .../CodeGen/RISCV/attr-rvv-vector-bits-cast.c | 14 +- .../RISCV/attr-rvv-vector-bits-globals.c | 40 +- .../CodeGen/SystemZ/builtins-systemz-i128.c | 126 +- .../SystemZ/gnu-atomic-builtins-i128-16Al.c | 68 +- .../SystemZ/gnu-atomic-builtins-i128-8Al.c | 68 +- .../CodeGen/SystemZ/sync-builtins-i128-16Al.c | 76 +- clang/test/CodeGen/SystemZ/zvector2.c | 118 +- clang/test/CodeGen/allow-ubsan-check.c | 250 +- .../attr-arm-sve-vector-bits-bitcast.c | 66 +- .../CodeGen/attr-arm-sve-vector-bits-cast.c | 14 +- .../attr-arm-sve-vector-bits-globals.c | 36 +- .../CodeGen/attr-counted-by-for-pointers.c | 212 +- clang/test/CodeGen/attr-counted-by-pr110385.c | 16 +- clang/test/CodeGen/attr-counted-by.c | 364 +- .../CodeGen/builtin-maximumnum-minimumnum.c | 94 +- clang/test/CodeGen/builtin-maxnum-minnum.c | 94 +- clang/test/CodeGen/errno-tbaa.c | 12 + clang/test/CodeGen/isfpclass.c | 10 +- .../math-libcalls-tbaa-indirect-args.c | 24 +- clang/test/CodeGen/math-libcalls-tbaa.c | 56 +- clang/test/CodeGen/pointer-arithmetic-align.c | 18 +- .../CodeGen/sanitize-metadata-ignorelist.c | 12 +- .../CodeGen/sanitize-metadata-nosanitize.c | 50 +- clang/test/CodeGen/tbaa-class.cpp | 12 +- clang/test/CodeGen/tbaa-pointers.c | 281 +- .../tbaa-struct-bitfield-endianness.cpp | 10 +- clang/test/CodeGen/tbaa-struct.cpp | 144 +- clang/test/CodeGen/tbaa.c | 97 +- clang/test/CodeGen/tbaa.cpp | 240 +- .../attr-likelihood-if-branch-weights.cpp | 90 +- .../CodeGenCXX/builtin-get-vtable-pointer.cpp | 1521 ++++++- clang/test/CodeGenCXX/cfi-mfcall-nomerge.cpp | 100 +- .../CodeGenCXX/inline-then-fold-variadics.cpp | 14 +- .../CodeGenCXX/load-reference-metadata.cpp | 72 +- clang/test/CodeGenCXX/std-byte.cpp | 28 +- .../CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl | 16 +- .../test/CodeGenOpenCL/amdgpu-cluster-dims.cl | 38 +- .../CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 176 +- clang/test/CodeGenOpenCL/amdgpu-printf.cl | 46 +- .../builtins-amdgcn-gfx12-wmma-w32.cl | 28 +- .../builtins-amdgcn-gfx12-wmma-w64.cl | 28 +- ...ins-amdgcn-gfx1250-async-load-store-lds.cl | 8 +- .../builtins-amdgcn-gws-insts.cl | 2 +- .../builtins-amdgcn-swmmac-w32.cl | 28 +- .../builtins-amdgcn-swmmac-w64.cl | 28 +- .../CodeGenOpenCL/builtins-amdgcn-wmma-w32.cl | 22 +- .../CodeGenOpenCL/builtins-amdgcn-wmma-w64.cl | 22 +- clang/test/CodeGenOpenCL/preserve_vec3.cl | 64 +- .../array-type-infinite-loop.clcpp | 22 +- .../Generic/unsigned-promotion-debuginfo.c | 102 +- clang/test/Headers/__clang_hip_math.hip | 1672 +++---- clang/test/Headers/wasm.c | 50 +- clang/test/OpenMP/bug54082.c | 28 +- ...arallel_reduction_codegen_tbaa_PR46146.cpp | 292 +- 74 files changed, 8717 insertions(+), 7400 deletions(-) create mode 100644 clang/test/CodeGen/errno-tbaa.c diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index c5eb14e329315..1085f45e0fc21 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -83,6 +83,7 @@ static llvm::cl::opt LimitedCoverage( llvm::cl::desc("Emit limited coverage mapping information (experimental)")); static const char AnnotationSection[] = "llvm.metadata"; +static constexpr auto ErrnoTBAAMDName = "llvm.errno.tbaa"; static CGCXXABI *createCXXABI(CodeGenModule &CGM) { switch (CGM.getContext().getCXXABIKind()) { @@ -1583,6 +1584,17 @@ void CodeGenModule::Release() { } } } + + // Emit `!llvm.errno.tbaa`, a module-level metadata that specifies the TBAA + // for an int access. This allows LLVM to reason about what memory can be + // accessed by certain library calls that only touch errno. + if (TBAA) { + TBAAAccessInfo TBAAInfo = getTBAAAccessInfo(Context.IntTy); + if (llvm::MDNode *IntegerNode = getTBAAAccessTagInfo(TBAAInfo)) { + auto *ErrnoTBAAMD = TheModule.getOrInsertNamedMetadata(ErrnoTBAAMDName); + ErrnoTBAAMD->addOperand(IntegerNode); + } + } } void CodeGenModule::EmitOpenCLMetadata() { diff --git a/clang/test/C/C11/n1285_1.c b/clang/test/C/C11/n1285_1.c index 345ec94a1eeef..a1422f2b6ff63 100644 --- a/clang/test/C/C11/n1285_1.c +++ b/clang/test/C/C11/n1285_1.c @@ -32,9 +32,9 @@ struct X f(void); // C11-O2-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[REF_TMP]], i32 0, i32 0 // C11-O2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [5 x i32], ptr [[A]], i64 0, i64 0 // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR5]] -// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr [[P]], align 8, !tbaa [[INTPTR_TBAA2:![0-9]+]] -// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[INTPTR_TBAA2]] -// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA7:![0-9]+]] +// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr [[P]], align 8, !tbaa [[INTPTR_TBAA6:![0-9]+]] +// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[INTPTR_TBAA6]] +// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[P]]) #[[ATTR5]] // C11-O2-NEXT: ret i32 [[TMP1]] // @@ -91,18 +91,18 @@ int func_return(void) { // C11-O2: [[COND_END]]: // C11-O2-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[REF_TMP]], i32 0, i32 0 // C11-O2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [5 x i32], ptr [[A1]], i64 0, i64 0 -// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[INTPTR_TBAA2]] +// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[INTPTR_TBAA6]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR5]] // C11-O2-NEXT: call void @llvm.lifetime.start.p0(ptr [[Q]]) #[[ATTR5]] // C11-O2-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[DOTCOMPOUNDLITERAL]], i8 0, i64 20, i1 false) // C11-O2-NEXT: [[A2:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[DOTCOMPOUNDLITERAL]], i32 0, i32 0 // C11-O2-NEXT: [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[DOTCOMPOUNDLITERAL]], i32 0, i32 0 // C11-O2-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [5 x i32], ptr [[A3]], i64 0, i64 0 -// C11-O2-NEXT: store ptr [[ARRAYDECAY4]], ptr [[Q]], align 8, !tbaa [[INTPTR_TBAA2]] -// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[INTPTR_TBAA2]] -// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA7]] -// C11-O2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[Q]], align 8, !tbaa [[INTPTR_TBAA2]] -// C11-O2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[INT_TBAA7]] +// C11-O2-NEXT: store ptr [[ARRAYDECAY4]], ptr [[Q]], align 8, !tbaa [[INTPTR_TBAA6]] +// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[INTPTR_TBAA6]] +// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA2]] +// C11-O2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[Q]], align 8, !tbaa [[INTPTR_TBAA6]] +// C11-O2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[INT_TBAA2]] // C11-O2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP3]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[Q]]) #[[ATTR5]] // C11-O2-NEXT: ret i32 [[ADD]] @@ -138,10 +138,10 @@ int ternary(void) { // C11-O2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP]], ptr align 4 [[X]], i64 20, i1 false), !tbaa.struct [[TBAA_STRUCT9:![0-9]+]] // C11-O2-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[REF_TMP]], i32 0, i32 0 // C11-O2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [5 x i32], ptr [[A]], i64 0, i64 0 -// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[INTPTR_TBAA2]] +// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[INTPTR_TBAA6]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR5]] -// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[INTPTR_TBAA2]] -// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA7]] +// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[INTPTR_TBAA6]] +// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA2]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[X]]) #[[ATTR5]] // C11-O2-NEXT: ret i32 [[TMP1]] // @@ -175,10 +175,10 @@ int comma(void) { // C11-O2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP]], ptr align 4 [[X]], i64 20, i1 false), !tbaa.struct [[TBAA_STRUCT9]] // C11-O2-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[REF_TMP]], i32 0, i32 0 // C11-O2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [5 x i32], ptr [[A]], i64 0, i64 0 -// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[INTPTR_TBAA2]] +// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[INTPTR_TBAA6]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR5]] -// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[INTPTR_TBAA2]] -// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA7]] +// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[INTPTR_TBAA6]] +// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA2]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[X]]) #[[ATTR5]] // C11-O2-NEXT: ret i32 [[TMP1]] // @@ -217,10 +217,10 @@ int cast(void) { // C11-O2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP]], ptr align 4 [[X]], i64 20, i1 false), !tbaa.struct [[TBAA_STRUCT9]] // C11-O2-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[REF_TMP]], i32 0, i32 0 // C11-O2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [5 x i32], ptr [[A]], i64 0, i64 0 -// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[INTPTR_TBAA2]] +// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[INTPTR_TBAA6]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR5]] -// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[INTPTR_TBAA2]] -// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA7]] +// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[INTPTR_TBAA6]] +// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA2]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[S]]) #[[ATTR5]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[X]]) #[[ATTR5]] // C11-O2-NEXT: ret i32 [[TMP1]] @@ -232,13 +232,13 @@ int assign(void) { return *p; } //. -// C11-O2: [[INTPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// C11-O2: [[META3]] = !{!"p1 int", [[META4:![0-9]+]], i64 0} -// C11-O2: [[META4]] = !{!"any pointer", [[META5:![0-9]+]], i64 0} -// C11-O2: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -// C11-O2: [[META6]] = !{!"Simple C/C++ TBAA"} -// C11-O2: [[INT_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} -// C11-O2: [[META8]] = !{!"int", [[META5]], i64 0} +// C11-O2: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// C11-O2: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// C11-O2: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// C11-O2: [[META5]] = !{!"Simple C/C++ TBAA"} +// C11-O2: [[INTPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// C11-O2: [[META7]] = !{!"p1 int", [[META8:![0-9]+]], i64 0} +// C11-O2: [[META8]] = !{!"any pointer", [[META4]], i64 0} // C11-O2: [[TBAA_STRUCT9]] = !{i64 0, i64 20, [[META10:![0-9]+]]} -// C11-O2: [[META10]] = !{[[META5]], [[META5]], i64 0} +// C11-O2: [[META10]] = !{[[META4]], [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/AArch64/ABI-align-packed.c b/clang/test/CodeGen/AArch64/ABI-align-packed.c index 09f9180cdb0a7..d88a3383fe402 100644 --- a/clang/test/CodeGen/AArch64/ABI-align-packed.c +++ b/clang/test/CodeGen/AArch64/ABI-align-packed.c @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -triple aarch64 -target-feature +neon -emit-llvm -O2 -o - %s | FileCheck %s #include @@ -58,8 +59,8 @@ struct non_packed_struct gs_non_packed_struct; // CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] alignstack(16) [[S_NON_PACKED_STRUCT_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[S_NON_PACKED_STRUCT_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_NON_PACKED_STRUCT_COERCE]], 0 -// CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2:![0-9]+]] -// CHECK-NEXT: store <8 x i16> [[S_NON_PACKED_STRUCT_COERCE_FCA_0_EXTRACT]], ptr @gs_non_packed_struct, align 16, !tbaa [[TBAA6:![0-9]+]] +// CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA6:![0-9]+]] +// CHECK-NEXT: store <8 x i16> [[S_NON_PACKED_STRUCT_COERCE_FCA_0_EXTRACT]], ptr @gs_non_packed_struct, align 16, !tbaa [[TBAA8:![0-9]+]] // CHECK-NEXT: ret void __attribute__((noinline)) void named_arg_non_packed_struct(double d0, double d1, double d2, double d3, double d4, double d5, double d6, double d7, @@ -113,8 +114,8 @@ struct packed_struct gs_packed_struct; // CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] alignstack(8) [[S_PACKED_STRUCT_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[S_PACKED_STRUCT_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_PACKED_STRUCT_COERCE]], 0 -// CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x i16> [[S_PACKED_STRUCT_COERCE_FCA_0_EXTRACT]], ptr @gs_packed_struct, align 1, !tbaa [[TBAA6]] +// CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x i16> [[S_PACKED_STRUCT_COERCE_FCA_0_EXTRACT]], ptr @gs_packed_struct, align 1, !tbaa [[TBAA8]] // CHECK-NEXT: ret void __attribute__((noinline)) void named_arg_packed_struct(double d0, double d1, double d2, double d3, double d4, double d5, double d6, double d7, @@ -168,8 +169,8 @@ struct packed_member gs_packed_member; // CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] alignstack(8) [[S_PACKED_MEMBER_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[S_PACKED_MEMBER_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_PACKED_MEMBER_COERCE]], 0 -// CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x i16> [[S_PACKED_MEMBER_COERCE_FCA_0_EXTRACT]], ptr @gs_packed_member, align 1, !tbaa [[TBAA6]] +// CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x i16> [[S_PACKED_MEMBER_COERCE_FCA_0_EXTRACT]], ptr @gs_packed_member, align 1, !tbaa [[TBAA8]] // CHECK-NEXT: ret void __attribute__((noinline)) void named_arg_packed_member(double d0, double d1, double d2, double d3, double d4, double d5, double d6, double d7, @@ -223,8 +224,8 @@ struct aligned_struct_8 gs_aligned_struct_8; // CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] alignstack(16) [[S_ALIGNED_STRUCT_8_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[S_ALIGNED_STRUCT_8_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_ALIGNED_STRUCT_8_COERCE]], 0 -// CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x i16> [[S_ALIGNED_STRUCT_8_COERCE_FCA_0_EXTRACT]], ptr @gs_aligned_struct_8, align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x i16> [[S_ALIGNED_STRUCT_8_COERCE_FCA_0_EXTRACT]], ptr @gs_aligned_struct_8, align 16, !tbaa [[TBAA8]] // CHECK-NEXT: ret void __attribute__((noinline)) void named_arg_aligned_struct_8(double d0, double d1, double d2, double d3, double d4, double d5, double d6, double d7, @@ -278,8 +279,8 @@ struct aligned_member_8 gs_aligned_member_8; // CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] alignstack(16) [[S_ALIGNED_MEMBER_8_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[S_ALIGNED_MEMBER_8_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_ALIGNED_MEMBER_8_COERCE]], 0 -// CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x i16> [[S_ALIGNED_MEMBER_8_COERCE_FCA_0_EXTRACT]], ptr @gs_aligned_member_8, align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x i16> [[S_ALIGNED_MEMBER_8_COERCE_FCA_0_EXTRACT]], ptr @gs_aligned_member_8, align 16, !tbaa [[TBAA8]] // CHECK-NEXT: ret void __attribute__((noinline)) void named_arg_aligned_member_8(double d0, double d1, double d2, double d3, double d4, double d5, double d6, double d7, @@ -333,8 +334,8 @@ struct pragma_packed_struct_8 gs_pragma_packed_struct_8; // CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] alignstack(8) [[S_PRAGMA_PACKED_STRUCT_8_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[S_PRAGMA_PACKED_STRUCT_8_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_PRAGMA_PACKED_STRUCT_8_COERCE]], 0 -// CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x i16> [[S_PRAGMA_PACKED_STRUCT_8_COERCE_FCA_0_EXTRACT]], ptr @gs_pragma_packed_struct_8, align 8, !tbaa [[TBAA6]] +// CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x i16> [[S_PRAGMA_PACKED_STRUCT_8_COERCE_FCA_0_EXTRACT]], ptr @gs_pragma_packed_struct_8, align 8, !tbaa [[TBAA8]] // CHECK-NEXT: ret void __attribute__((noinline)) void named_arg_pragma_packed_struct_8(double d0, double d1, double d2, double d3, double d4, double d5, double d6, double d7, @@ -388,8 +389,8 @@ struct pragma_packed_struct_4 gs_pragma_packed_struct_4; // CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] alignstack(8) [[S_PRAGMA_PACKED_STRUCT_4_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[S_PRAGMA_PACKED_STRUCT_4_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_PRAGMA_PACKED_STRUCT_4_COERCE]], 0 -// CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x i16> [[S_PRAGMA_PACKED_STRUCT_4_COERCE_FCA_0_EXTRACT]], ptr @gs_pragma_packed_struct_4, align 4, !tbaa [[TBAA6]] +// CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x i16> [[S_PRAGMA_PACKED_STRUCT_4_COERCE_FCA_0_EXTRACT]], ptr @gs_pragma_packed_struct_4, align 4, !tbaa [[TBAA8]] // CHECK-NEXT: ret void __attribute__((noinline)) void named_arg_pragma_packed_struct_4(double d0, double d1, double d2, double d3, double d4, double d5, double d6, double d7, @@ -437,9 +438,9 @@ void test_pragma_packed_struct_4() { variadic_pragma_packed_struct_4(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, s_pragma_packed_struct_4); } //. -// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"double", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} -// CHECK: [[TBAA6]] = !{[[META4]], [[META4]], i64 0} +// CHECK: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"double", [[META4]], i64 0} +// CHECK: [[TBAA8]] = !{[[META4]], [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/AArch64/fp8-init-list.c b/clang/test/CodeGen/AArch64/fp8-init-list.c index 7c0f6278b2090..f461977ea835a 100644 --- a/clang/test/CodeGen/AArch64/fp8-init-list.c +++ b/clang/test/CodeGen/AArch64/fp8-init-list.c @@ -34,26 +34,26 @@ struct S s; // CHECK-LABEL: define dso_local void @f( // CHECK-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[__MFP8_TBAA2:![0-9]+]] +// CHECK-NEXT: store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[__MFP8_TBAA6:![0-9]+]] // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z1fu6__mfp8( // CHECK-CXX-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[__MFP8_TBAA2:![0-9]+]] +// CHECK-CXX-NEXT: store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[__MFP8_TBAA6:![0-9]+]] // CHECK-CXX-NEXT: ret void // void f(__mfp8 x) { s = (struct S){x}; } //. -// CHECK: [[__MFP8_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"__mfp8", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[__MFP8_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"__mfp8", [[META4]], i64 0} //. -// CHECK-CXX: [[__MFP8_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK-CXX: [[META3]] = !{!"__mfp8", [[META4:![0-9]+]], i64 0} -// CHECK-CXX: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-CXX: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK-CXX: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK-CXX: [[__MFP8_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-CXX: [[META7]] = !{!"__mfp8", [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/AArch64/ls64-inline-asm.c b/clang/test/CodeGen/AArch64/ls64-inline-asm.c index 1d217eb8801e5..04e2207357817 100644 --- a/clang/test/CodeGen/AArch64/ls64-inline-asm.c +++ b/clang/test/CodeGen/AArch64/ls64-inline-asm.c @@ -6,7 +6,7 @@ struct foo { unsigned long long x[8]; }; // CHECK-LABEL: define dso_local void @load( // CHECK-SAME: ptr noundef writeonly captures(none) initializes((0, 64)) [[OUTPUT:%.*]], ptr noundef [[ADDR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call i512 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr [[ADDR]]) #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i512 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr [[ADDR]]) #[[ATTR1:[0-9]+]], !srcloc [[META6:![0-9]+]] // CHECK-NEXT: store i512 [[TMP0]], ptr [[OUTPUT]], align 8 // CHECK-NEXT: ret void // @@ -19,7 +19,7 @@ void load(struct foo *output, void *addr) // CHECK-SAME: ptr noundef readonly captures(none) [[INPUT:%.*]], ptr noundef [[ADDR:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load i512, ptr [[INPUT]], align 8 -// CHECK-NEXT: tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[TMP0]], ptr [[ADDR]]) #[[ATTR1]], !srcloc [[META3:![0-9]+]] +// CHECK-NEXT: tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[TMP0]], ptr [[ADDR]]) #[[ATTR1]], !srcloc [[META7:![0-9]+]] // CHECK-NEXT: ret void // void store(const struct foo *input, void *addr) @@ -30,28 +30,28 @@ void store(const struct foo *input, void *addr) // CHECK-LABEL: define dso_local void @store2( // CHECK-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef [[ADDR:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[IN]], align 4, !tbaa [[INT_TBAA4:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[IN]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] // CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64 // CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[INT_TBAA4]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[CONV2:%.*]] = sext i32 [[TMP1]] to i64 // CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 16 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4, !tbaa [[INT_TBAA4]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[CONV5:%.*]] = sext i32 [[TMP2]] to i64 // CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 64 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !tbaa [[INT_TBAA4]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[CONV8:%.*]] = sext i32 [[TMP3]] to i64 // CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 100 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4, !tbaa [[INT_TBAA4]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[CONV11:%.*]] = sext i32 [[TMP4]] to i64 // CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 144 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4, !tbaa [[INT_TBAA4]] +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[CONV14:%.*]] = sext i32 [[TMP5]] to i64 // CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 196 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !tbaa [[INT_TBAA4]] +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[CONV17:%.*]] = sext i32 [[TMP6]] to i64 // CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 256 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !tbaa [[INT_TBAA4]] +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[CONV20:%.*]] = sext i32 [[TMP7]] to i64 // CHECK-NEXT: [[S_SROA_10_0_INSERT_EXT:%.*]] = zext i64 [[CONV20]] to i512 // CHECK-NEXT: [[S_SROA_10_0_INSERT_SHIFT:%.*]] = shl nuw i512 [[S_SROA_10_0_INSERT_EXT]], 448 @@ -84,11 +84,11 @@ void store2(int *in, void *addr) __asm__ volatile ("st64b %0,[%1]" : : "r" (s), "r" (addr) : "memory" ); } //. -// CHECK: [[META2]] = !{i64 789} -// CHECK: [[META3]] = !{i64 1368} -// CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -// CHECK: [[META5]] = !{!"int", [[META6:![0-9]+]], i64 0} -// CHECK: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} -// CHECK: [[META7]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META6]] = !{i64 789} +// CHECK: [[META7]] = !{i64 1368} // CHECK: [[META8]] = !{i64 5992} //. diff --git a/clang/test/CodeGen/LoongArch/inline-asm-operand-modifiers.c b/clang/test/CodeGen/LoongArch/inline-asm-operand-modifiers.c index b36fe7a7b69bb..bbb9962dd5dd2 100644 --- a/clang/test/CodeGen/LoongArch/inline-asm-operand-modifiers.c +++ b/clang/test/CodeGen/LoongArch/inline-asm-operand-modifiers.c @@ -6,7 +6,7 @@ // CHECK-LABEL: @test_z_zero( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 asm sideeffect "add.w $0, $1, ${2:z}", "=r,r,ri"(i32 [[A:%.*]], i32 0) #[[ATTR1:[0-9]+]], !srcloc !2 +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 asm sideeffect "add.w $0, $1, ${2:z}", "=r,r,ri"(i32 [[A:%.*]], i32 0) #[[ATTR1:[0-9]+]], !srcloc [[META6:![0-9]+]] // CHECK-NEXT: ret void // void test_z_zero(int a) { @@ -16,7 +16,7 @@ void test_z_zero(int a) { // CHECK-LABEL: @test_z_nonzero( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 asm sideeffect "add.w $0, $1, ${2:z}", "=r,r,ri"(i32 [[A:%.*]], i32 1) #[[ATTR1]], !srcloc !3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 asm sideeffect "add.w $0, $1, ${2:z}", "=r,r,ri"(i32 [[A:%.*]], i32 1) #[[ATTR1]], !srcloc [[META7:![0-9]+]] // CHECK-NEXT: ret void // void test_z_nonzero(int a) { diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c index dd094e5493a60..03a746c966cdd 100644 --- a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c @@ -6,2808 +6,2808 @@ // CHECK-LABEL: define dso_local void @xvsll_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2:![0-9]+]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6:![0-9]+]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsll_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsll_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsll_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvslli_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvslli_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvslli_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvslli_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvsra_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsra_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsra_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsra_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrai_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrai_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrai_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrai_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrar_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrar_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrar_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrar_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrari_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrari_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrari_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrari_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrl_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrl_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrl_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrl_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrli_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrli_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrli_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrli_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrlr_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrlr_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrlr_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrlr_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrlri_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrlri_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrlri_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrlri_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitclr_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitclr_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitclr_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitclr_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitclri_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitclri_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitclri_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitclri_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitset_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitset_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitset_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitset_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitseti_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitseti_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitseti_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitseti_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitrev_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitrev_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitrev_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitrev_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitrevi_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitrevi_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitrevi_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitrevi_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvadd_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvadd_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvadd_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvadd_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddi_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); } // CHECK-LABEL: define dso_local void @xvaddi_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); } // CHECK-LABEL: define dso_local void @xvaddi_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); } // CHECK-LABEL: define dso_local void @xvaddi_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); } // CHECK-LABEL: define dso_local void @xvsub_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsub_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsub_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsub_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubi_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsubi_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsubi_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsubi_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); } // CHECK-LABEL: define dso_local void @xvmax_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmax_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmax_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmax_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmaxi_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvmaxi_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvmaxi_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvmaxi_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvmax_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmax_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmax_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmax_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvmaxi_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); } // CHECK-LABEL: define dso_local void @xvmaxi_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); } // CHECK-LABEL: define dso_local void @xvmaxi_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); } // CHECK-LABEL: define dso_local void @xvmaxi_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); } // CHECK-LABEL: define dso_local void @xvmin_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmin_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmin_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmin_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmini_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvmini_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvmini_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvmini_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvmin_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmin_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmin_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmin_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvmini_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); } // CHECK-LABEL: define dso_local void @xvmini_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); } // CHECK-LABEL: define dso_local void @xvmini_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); } // CHECK-LABEL: define dso_local void @xvmini_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); } // CHECK-LABEL: define dso_local void @xvseq_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvseq_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvseq_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvseq_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvseqi_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvseqi_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvseqi_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvseqi_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvslt_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvslt_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvslt_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvslt_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvslti_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvslti_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvslti_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvslti_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvslt_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvslt_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvslt_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvslt_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvslti_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); } // CHECK-LABEL: define dso_local void @xvslti_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); } // CHECK-LABEL: define dso_local void @xvslti_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); } // CHECK-LABEL: define dso_local void @xvslti_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); } // CHECK-LABEL: define dso_local void @xvsle_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsle_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsle_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsle_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvslei_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvslei_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvslei_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvslei_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvsle_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsle_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsle_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsle_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvslei_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); } // CHECK-LABEL: define dso_local void @xvslei_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); } // CHECK-LABEL: define dso_local void @xvslei_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); } // CHECK-LABEL: define dso_local void @xvslei_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); } // CHECK-LABEL: define dso_local void @xvsat_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvsat_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvsat_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvsat_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvsat_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsat_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsat_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsat_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); } // CHECK-LABEL: define dso_local void @xvadda_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvadda_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvadda_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvadda_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsadd_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsadd_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsadd_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsadd_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsadd_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsadd_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsadd_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsadd_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvavg_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvavg_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvavg_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvavg_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvavg_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvavg_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvavg_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvavg_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvavgr_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvavgr_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvavgr_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvavgr_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvavgr_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvavgr_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvavgr_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvavgr_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvssub_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvssub_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssub_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssub_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvssub_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvssub_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvssub_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvssub_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvabsd_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvabsd_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvabsd_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvabsd_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvabsd_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvabsd_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvabsd_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvabsd_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvmul_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmul_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmul_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmul_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmadd_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmadd_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmadd_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmadd_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmsub_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmsub_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmsub_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmsub_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvdiv_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvdiv_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvdiv_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvdiv_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvdiv_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvdiv_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvdiv_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvdiv_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvhaddw_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvhaddw_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvhaddw_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvhaddw_hu_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvhaddw_wu_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvhaddw_du_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvhsubw_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvhsubw_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvhsubw_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvhsubw_hu_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvhsubw_wu_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvhsubw_du_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmod_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmod_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmod_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmod_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmod_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmod_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmod_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmod_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvrepl128vei_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvrepl128vei_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvrepl128vei_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvrepl128vei_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvpickev_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvpickev_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvpickev_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvpickev_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvpickod_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvpickod_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvpickod_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvpickod_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvilvh_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvilvh_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvilvh_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvilvh_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvilvl_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvilvl_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvilvl_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvilvl_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvpackev_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvpackev_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvpackev_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvpackev_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvpackod_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvpackod_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvpackod_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvpackod_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvshuf_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvshuf_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvshuf_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvshuf_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvand_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); } // CHECK-LABEL: define dso_local void @xvandi_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvor_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); } // CHECK-LABEL: define dso_local void @xvori_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvnor_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); } // CHECK-LABEL: define dso_local void @xvnori_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvxor_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); } // CHECK-LABEL: define dso_local void @xvxori_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitsel_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvbitseli_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvshuf4i_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvshuf4i_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvshuf4i_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); } @@ -2815,7 +2815,7 @@ v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); } @@ -2823,7 +2823,7 @@ v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); } @@ -2831,7 +2831,7 @@ v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); } @@ -2840,1641 +2840,1641 @@ v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); } // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1]] to i64 // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) -// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); } // CHECK-LABEL: define dso_local void @xvpcnt_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); } // CHECK-LABEL: define dso_local void @xvpcnt_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); } // CHECK-LABEL: define dso_local void @xvpcnt_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); } // CHECK-LABEL: define dso_local void @xvpcnt_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); } // CHECK-LABEL: define dso_local void @xvclo_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); } // CHECK-LABEL: define dso_local void @xvclo_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); } // CHECK-LABEL: define dso_local void @xvclo_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); } // CHECK-LABEL: define dso_local void @xvclo_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); } // CHECK-LABEL: define dso_local void @xvclz_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); } // CHECK-LABEL: define dso_local void @xvclz_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); } // CHECK-LABEL: define dso_local void @xvclz_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); } // CHECK-LABEL: define dso_local void @xvclz_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); } // CHECK-LABEL: define dso_local void @xvfadd_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfadd_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfsub_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfsub_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmul_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmul_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfdiv_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfdiv_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcvt_h_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcvt_s_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmin_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmin_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmina_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmina_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmax_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmax_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmaxa_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmaxa_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfclass_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); } // CHECK-LABEL: define dso_local void @xvfclass_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); } // CHECK-LABEL: define dso_local void @xvfsqrt_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); } // CHECK-LABEL: define dso_local void @xvfsqrt_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); } // CHECK-LABEL: define dso_local void @xvfrecip_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); } // CHECK-LABEL: define dso_local void @xvfrecip_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); } // CHECK-LABEL: define dso_local void @xvfrint_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); } // CHECK-LABEL: define dso_local void @xvfrint_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); } // CHECK-LABEL: define dso_local void @xvfrsqrt_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); } // CHECK-LABEL: define dso_local void @xvfrsqrt_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); } // CHECK-LABEL: define dso_local void @xvflogb_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); } // CHECK-LABEL: define dso_local void @xvflogb_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); } // CHECK-LABEL: define dso_local void @xvfcvth_s_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); } // CHECK-LABEL: define dso_local void @xvfcvth_d_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); } // CHECK-LABEL: define dso_local void @xvfcvtl_s_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); } // CHECK-LABEL: define dso_local void @xvfcvtl_d_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); } // CHECK-LABEL: define dso_local void @xvftint_w_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); } // CHECK-LABEL: define dso_local void @xvftint_l_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); } // CHECK-LABEL: define dso_local void @xvftint_wu_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); } // CHECK-LABEL: define dso_local void @xvftint_lu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); } // CHECK-LABEL: define dso_local void @xvftintrz_w_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrz_l_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); } // CHECK-LABEL: define dso_local void @xvftintrz_wu_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrz_lu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); } // CHECK-LABEL: define dso_local void @xvffint_s_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); } // CHECK-LABEL: define dso_local void @xvffint_d_l( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); } // CHECK-LABEL: define dso_local void @xvffint_s_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_112]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); } // CHECK-LABEL: define dso_local void @xvffint_d_lu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); } // CHECK-LABEL: define dso_local void @xvreplve_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_112]], i32 [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvreplve_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_112]], i32 [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvreplve_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_112]], i32 [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvreplve_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvpermi_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvandn_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); } // CHECK-LABEL: define dso_local void @xvneg_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); } // CHECK-LABEL: define dso_local void @xvneg_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); } // CHECK-LABEL: define dso_local void @xvneg_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); } // CHECK-LABEL: define dso_local void @xvneg_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); } // CHECK-LABEL: define dso_local void @xvmuh_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmuh_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmuh_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmuh_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmuh_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmuh_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmuh_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmuh_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvsllwil_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvsllwil_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvsllwil_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvsllwil_hu_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsllwil_wu_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsllwil_du_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsran_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsran_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsran_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvssran_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssran_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssran_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvssran_bu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssran_hu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssran_wu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrarn_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrarn_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrarn_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrarn_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrarn_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrarn_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrarn_bu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrarn_hu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrarn_wu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrln_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrln_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrln_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrln_bu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrln_hu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrln_wu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrlrn_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrlrn_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrlrn_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrlrn_bu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrlrn_hu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrlrn_wu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfrstpi_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvfrstpi_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvfrstp_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvfrstp_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvshuf4i_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvbsrl_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); } // CHECK-LABEL: define dso_local void @xvbsll_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); } // CHECK-LABEL: define dso_local void @xvextrins_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvextrins_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvextrins_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvextrins_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvmskltz_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); } // CHECK-LABEL: define dso_local void @xvmskltz_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); } // CHECK-LABEL: define dso_local void @xvmskltz_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); } // CHECK-LABEL: define dso_local void @xvmskltz_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); } // CHECK-LABEL: define dso_local void @xvsigncov_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsigncov_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsigncov_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsigncov_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmadd_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvfmadd_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvfmsub_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvfmsub_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvfnmadd_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvfnmadd_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvfnmsub_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvfnmsub_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvftintrne_w_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrne_l_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); } // CHECK-LABEL: define dso_local void @xvftintrp_w_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrp_l_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); } // CHECK-LABEL: define dso_local void @xvftintrm_w_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrm_l_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); } // CHECK-LABEL: define dso_local void @xvftint_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvffint_s_l( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); } // CHECK-LABEL: define dso_local void @xvftintrz_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvftintrp_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvftintrm_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvftintrne_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvftinth_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); } // CHECK-LABEL: define dso_local void @xvftintl_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); } // CHECK-LABEL: define dso_local void @xvffinth_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); } // CHECK-LABEL: define dso_local void @xvffintl_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); } // CHECK-LABEL: define dso_local void @xvftintrzh_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrzl_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrph_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrpl_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrmh_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrml_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrneh_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrnel_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); } // CHECK-LABEL: define dso_local void @xvfrintrne_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); } // CHECK-LABEL: define dso_local void @xvfrintrne_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); } // CHECK-LABEL: define dso_local void @xvfrintrz_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); } // CHECK-LABEL: define dso_local void @xvfrintrz_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); } // CHECK-LABEL: define dso_local void @xvfrintrp_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); } // CHECK-LABEL: define dso_local void @xvfrintrp_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); } // CHECK-LABEL: define dso_local void @xvfrintrm_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); } // CHECK-LABEL: define dso_local void @xvfrintrm_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); } @@ -4482,14 +4482,14 @@ v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); } // CHECK-LABEL: define dso_local void @xvst( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2]], i32 1) // CHECK-NEXT: ret void // @@ -4497,7 +4497,7 @@ void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvstelm_b( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2]], i32 1, i32 1) // CHECK-NEXT: ret void // @@ -4505,7 +4505,7 @@ void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); } // CHECK-LABEL: define dso_local void @xvstelm_h( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2]], i32 2, i32 1) // CHECK-NEXT: ret void // @@ -4513,7 +4513,7 @@ void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); } // CHECK-LABEL: define dso_local void @xvstelm_w( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2]], i32 4, i32 1) // CHECK-NEXT: ret void // @@ -4521,7 +4521,7 @@ void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); } // CHECK-LABEL: define dso_local void @xvstelm_d( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2]], i32 8, i32 1) // CHECK-NEXT: ret void // @@ -4529,108 +4529,108 @@ void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); } // CHECK-LABEL: define dso_local void @xvinsve0_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvinsve0_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvpickve_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvpickve_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvssrlrn_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrlrn_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrlrn_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrln_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrln_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrln_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvorn_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); } @@ -4638,22 +4638,22 @@ v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvldi() { return __lasx_xvldi(1); } // CHECK-LABEL: define dso_local void @xvldx( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1]], i64 1), !noalias [[META5:![0-9]+]] -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1]], i64 1), !noalias [[META7:![0-9]+]] +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); } // CHECK-LABEL: define dso_local void @xvstx( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_112]], ptr [[_2]], i64 1) // CHECK-NEXT: ret void // @@ -4661,209 +4661,209 @@ void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvextl_qu_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); } // CHECK-LABEL: define dso_local void @xvinsgr2vr_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); } // CHECK-LABEL: define dso_local void @xvinsgr2vr_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); } // CHECK-LABEL: define dso_local void @xvreplve0_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); } // CHECK-LABEL: define dso_local void @xvreplve0_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); } // CHECK-LABEL: define dso_local void @xvreplve0_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); } // CHECK-LABEL: define dso_local void @xvreplve0_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); } // CHECK-LABEL: define dso_local void @xvreplve0_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); } // CHECK-LABEL: define dso_local void @vext2xv_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); } // CHECK-LABEL: define dso_local void @vext2xv_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); } // CHECK-LABEL: define dso_local void @vext2xv_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); } // CHECK-LABEL: define dso_local void @vext2xv_w_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); } // CHECK-LABEL: define dso_local void @vext2xv_d_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); } // CHECK-LABEL: define dso_local void @vext2xv_d_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); } // CHECK-LABEL: define dso_local void @vext2xv_hu_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); } // CHECK-LABEL: define dso_local void @vext2xv_wu_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); } // CHECK-LABEL: define dso_local void @vext2xv_du_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_112]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); } // CHECK-LABEL: define dso_local void @vext2xv_wu_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); } // CHECK-LABEL: define dso_local void @vext2xv_du_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_112]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); } // CHECK-LABEL: define dso_local void @vext2xv_du_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_112]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); } // CHECK-LABEL: define dso_local void @xvpermi_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvpermi_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvperm_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); } @@ -4871,7 +4871,7 @@ v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); } @@ -4879,7 +4879,7 @@ v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1]], i32 2) -// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); } @@ -4887,7 +4887,7 @@ v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1]], i32 4) -// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); } @@ -4895,14 +4895,14 @@ v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1]], i32 8) -// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); } // CHECK-LABEL: define dso_local signext i32 @xvpickve2gr_w( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -4910,7 +4910,7 @@ int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); } // CHECK-LABEL: define dso_local signext i32 @xvpickve2gr_wu( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -4918,7 +4918,7 @@ unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); } // CHECK-LABEL: define dso_local i64 @xvpickve2gr_d( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1) // CHECK-NEXT: ret i64 [[TMP1]] // @@ -4926,7 +4926,7 @@ long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); } // CHECK-LABEL: define dso_local i64 @xvpickve2gr_du( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1) // CHECK-NEXT: ret i64 [[TMP1]] // @@ -4934,1626 +4934,1626 @@ unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1) // CHECK-LABEL: define dso_local void @xvaddwev_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_q_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_d_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_w_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_h_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwev_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwev_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwev_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwev_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwev_q_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwev_d_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwev_w_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwev_h_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_q_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_d_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_w_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_h_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_q_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_d_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_w_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_h_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwod_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwod_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwod_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwod_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwod_q_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwod_d_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwod_w_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwod_h_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_q_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_d_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_w_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_h_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_d_wu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_w_hu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_h_bu_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_d_wu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_w_hu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_h_bu_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_d_wu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_w_hu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_h_bu_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_d_wu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_w_hu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_h_bu_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvhaddw_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvhaddw_qu_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvhsubw_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvhsubw_qu_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvmaddwev_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_q_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_d_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_w_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_h_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_q_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_d_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_w_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_h_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_q_du_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_d_wu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_w_hu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_h_bu_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_q_du_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_d_wu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_w_hu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_h_bu_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvrotr_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvrotr_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvrotr_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvrotr_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvadd_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); } // CHECK-LABEL: define dso_local void @xvsub_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_q_du_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_q_du_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_q_du_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_q_du_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmskgez_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); } // CHECK-LABEL: define dso_local void @xvmsknz_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); } // CHECK-LABEL: define dso_local void @xvexth_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); } // CHECK-LABEL: define dso_local void @xvexth_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); } // CHECK-LABEL: define dso_local void @xvexth_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); } // CHECK-LABEL: define dso_local void @xvexth_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); } // CHECK-LABEL: define dso_local void @xvexth_hu_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); } // CHECK-LABEL: define dso_local void @xvexth_wu_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); } // CHECK-LABEL: define dso_local void @xvexth_du_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_112]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); } // CHECK-LABEL: define dso_local void @xvexth_qu_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); } // CHECK-LABEL: define dso_local void @xvrotri_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvrotri_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvrotri_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvrotri_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvextl_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); } // CHECK-LABEL: define dso_local void @xvsrlni_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrlni_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrlni_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrlni_d_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrlrni_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrlrni_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrlrni_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrlrni_d_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlni_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlni_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlni_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlni_d_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlni_bu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlni_hu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlni_wu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlni_du_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlrni_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlrni_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlrni_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlrni_d_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlrni_bu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlrni_hu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlrni_wu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlrni_du_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrani_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrani_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrani_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrani_d_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrarni_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrarni_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrarni_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrarni_d_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrani_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrani_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrani_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrani_d_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrani_bu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrani_hu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrani_wu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrani_du_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrarni_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrarni_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrarni_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrarni_d_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrarni_bu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrarni_hu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrarni_wu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrarni_du_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); } // CHECK-LABEL: define dso_local signext i32 @xbnz_b( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6561,7 +6561,7 @@ int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); } // CHECK-LABEL: define dso_local signext i32 @xbnz_d( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6569,7 +6569,7 @@ int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); } // CHECK-LABEL: define dso_local signext i32 @xbnz_h( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6577,7 +6577,7 @@ int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); } // CHECK-LABEL: define dso_local signext i32 @xbnz_v( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6585,7 +6585,7 @@ int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); } // CHECK-LABEL: define dso_local signext i32 @xbnz_w( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6593,7 +6593,7 @@ int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); } // CHECK-LABEL: define dso_local signext i32 @xbz_b( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6601,7 +6601,7 @@ int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); } // CHECK-LABEL: define dso_local signext i32 @xbz_d( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6609,7 +6609,7 @@ int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); } // CHECK-LABEL: define dso_local signext i32 @xbz_h( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6617,7 +6617,7 @@ int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); } // CHECK-LABEL: define dso_local signext i32 @xbz_v( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6625,7 +6625,7 @@ int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); } // CHECK-LABEL: define dso_local signext i32 @xbz_w( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6633,458 +6633,458 @@ int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); } // CHECK-LABEL: define dso_local void @xvfcmp_caf_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_caf_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_ceq_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_ceq_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cle_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cle_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_clt_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_clt_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cne_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cne_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cor_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cor_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cueq_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cueq_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cule_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cule_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cult_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cult_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cun_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cune_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cune_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cun_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_saf_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_saf_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_seq_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_seq_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sle_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sle_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_slt_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_slt_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sne_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sne_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sor_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sor_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sueq_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sueq_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sule_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sule_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sult_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sult_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sun_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sune_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sune_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sun_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvpickve_d_f( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); } // CHECK-LABEL: define dso_local void @xvpickve_w_f( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); } @@ -7092,7 +7092,7 @@ v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); } @@ -7100,7 +7100,7 @@ v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); } @@ -7108,7 +7108,7 @@ v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); } @@ -7116,15 +7116,15 @@ v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); } //. -// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} -// CHECK: [[META5]] = !{[[META6:![0-9]+]]} -// CHECK: [[META6]] = distinct !{[[META6]], [[META7:![0-9]+]], !"__lasx_xvldx: %agg.result"} -// CHECK: [[META7]] = distinct !{[[META7]], !"__lasx_xvldx"} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0} +// CHECK: [[META7]] = !{[[META8:![0-9]+]]} +// CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]], !"__lasx_xvldx: %agg.result"} +// CHECK: [[META9]] = distinct !{[[META9]], !"__lasx_xvldx"} //. diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c index b194ea8f3182a..1cbcdcf402893 100644 --- a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c @@ -6,41 +6,41 @@ // CHECK-LABEL: define dso_local void @xvfrecipe_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2:![0-9]+]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfrecipe_s(v8f32 _1) { return __lasx_xvfrecipe_s(_1); } // CHECK-LABEL: define dso_local void @xvfrecipe_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfrecipe_d(v4f64 _1) { return __lasx_xvfrecipe_d(_1); } // CHECK-LABEL: define dso_local void @xvfrsqrte_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfrsqrte_s(v8f32 _1) { return __lasx_xvfrsqrte_s(_1); } // CHECK-LABEL: define dso_local void @xvfrsqrte_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfrsqrte_d(v4f64 _1) { return __lasx_xvfrsqrte_d(_1); } //. -// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c index 9d543dfabe3d2..5276a488d1d1a 100644 --- a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c @@ -7,41 +7,41 @@ typedef double v4f64 __attribute__((vector_size(32), aligned(32))); // CHECK-LABEL: define dso_local void @xvfrecipe_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2:![0-9]+]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfrecipe_s(v8f32 _1) { return __builtin_lasx_xvfrecipe_s(_1); } // CHECK-LABEL: define dso_local void @xvfrecipe_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfrecipe_d(v4f64 _1) { return __builtin_lasx_xvfrecipe_d(_1); } // CHECK-LABEL: define dso_local void @xvfrsqrte_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfrsqrte_s(v8f32 _1) { return __builtin_lasx_xvfrsqrte_s(_1); } // CHECK-LABEL: define dso_local void @xvfrsqrte_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfrsqrte_d(v4f64 _1) { return __builtin_lasx_xvfrsqrte_d(_1); } //. -// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin.c b/clang/test/CodeGen/LoongArch/lasx/builtin.c index 9b21c7ea3e8a5..700e845cd662a 100644 --- a/clang/test/CodeGen/LoongArch/lasx/builtin.c +++ b/clang/test/CodeGen/LoongArch/lasx/builtin.c @@ -28,2808 +28,2808 @@ typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); // CHECK-LABEL: define dso_local void @xvsll_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2:![0-9]+]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6:![0-9]+]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsll_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsll_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsll_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsll_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsll_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsll_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsll_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvslli_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvslli_b(v32i8 _1) { return __builtin_lasx_xvslli_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvslli_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvslli_h(v16i16 _1) { return __builtin_lasx_xvslli_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvslli_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvslli_w(v8i32 _1) { return __builtin_lasx_xvslli_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvslli_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvslli_d(v4i64 _1) { return __builtin_lasx_xvslli_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvsra_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsra_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsra_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsra_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsra_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsra_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsra_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsra_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrai_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrai_b(v32i8 _1) { return __builtin_lasx_xvsrai_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrai_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrai_h(v16i16 _1) { return __builtin_lasx_xvsrai_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrai_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrai_w(v8i32 _1) { return __builtin_lasx_xvsrai_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrai_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrai_d(v4i64 _1) { return __builtin_lasx_xvsrai_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrar_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrar_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrar_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrar_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrar_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrar_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrar_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrar_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrari_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrari_b(v32i8 _1) { return __builtin_lasx_xvsrari_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrari_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrari_h(v16i16 _1) { return __builtin_lasx_xvsrari_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrari_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrari_w(v8i32 _1) { return __builtin_lasx_xvsrari_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrari_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrari_d(v4i64 _1) { return __builtin_lasx_xvsrari_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrl_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrl_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrl_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrl_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrl_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrl_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrl_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrl_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrli_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrli_b(v32i8 _1) { return __builtin_lasx_xvsrli_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrli_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrli_h(v16i16 _1) { return __builtin_lasx_xvsrli_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrli_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrli_w(v8i32 _1) { return __builtin_lasx_xvsrli_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrli_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrli_d(v4i64 _1) { return __builtin_lasx_xvsrli_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrlr_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlr_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrlr_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlr_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrlr_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlr_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrlr_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlr_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrlri_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrlri_b(v32i8 _1) { return __builtin_lasx_xvsrlri_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrlri_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrlri_h(v16i16 _1) { return __builtin_lasx_xvsrlri_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrlri_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrlri_w(v8i32 _1) { return __builtin_lasx_xvsrlri_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvsrlri_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrlri_d(v4i64 _1) { return __builtin_lasx_xvsrlri_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitclr_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitclr_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitclr_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitclr_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitclr_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitclr_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitclr_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitclr_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitclri_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvbitclri_b(v32u8 _1) { return __builtin_lasx_xvbitclri_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitclri_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvbitclri_h(v16u16 _1) { return __builtin_lasx_xvbitclri_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitclri_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvbitclri_w(v8u32 _1) { return __builtin_lasx_xvbitclri_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitclri_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvbitclri_d(v4u64 _1) { return __builtin_lasx_xvbitclri_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitset_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitset_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitset_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitset_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitset_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitset_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitset_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitset_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitseti_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvbitseti_b(v32u8 _1) { return __builtin_lasx_xvbitseti_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitseti_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvbitseti_h(v16u16 _1) { return __builtin_lasx_xvbitseti_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitseti_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvbitseti_w(v8u32 _1) { return __builtin_lasx_xvbitseti_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitseti_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvbitseti_d(v4u64 _1) { return __builtin_lasx_xvbitseti_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitrev_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitrev_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitrev_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitrev_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitrev_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitrev_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitrev_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitrev_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvbitrevi_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvbitrevi_b(v32u8 _1) { return __builtin_lasx_xvbitrevi_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitrevi_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvbitrevi_h(v16u16 _1) { return __builtin_lasx_xvbitrevi_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitrevi_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvbitrevi_w(v8u32 _1) { return __builtin_lasx_xvbitrevi_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitrevi_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvbitrevi_d(v4u64 _1) { return __builtin_lasx_xvbitrevi_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvadd_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadd_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvadd_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadd_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvadd_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadd_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvadd_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddi_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvaddi_bu(v32i8 _1) { return __builtin_lasx_xvaddi_bu(_1, 1); } // CHECK-LABEL: define dso_local void @xvaddi_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvaddi_hu(v16i16 _1) { return __builtin_lasx_xvaddi_hu(_1, 1); } // CHECK-LABEL: define dso_local void @xvaddi_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvaddi_wu(v8i32 _1) { return __builtin_lasx_xvaddi_wu(_1, 1); } // CHECK-LABEL: define dso_local void @xvaddi_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddi_du(v4i64 _1) { return __builtin_lasx_xvaddi_du(_1, 1); } // CHECK-LABEL: define dso_local void @xvsub_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsub_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsub_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsub_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsub_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsub_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsub_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubi_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsubi_bu(v32i8 _1) { return __builtin_lasx_xvsubi_bu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsubi_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsubi_hu(v16i16 _1) { return __builtin_lasx_xvsubi_hu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsubi_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsubi_wu(v8i32 _1) { return __builtin_lasx_xvsubi_wu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsubi_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubi_du(v4i64 _1) { return __builtin_lasx_xvsubi_du(_1, 1); } // CHECK-LABEL: define dso_local void @xvmax_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmax_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmax_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmax_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmax_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmax_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmax_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmax_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmaxi_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmaxi_b(v32i8 _1) { return __builtin_lasx_xvmaxi_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvmaxi_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmaxi_h(v16i16 _1) { return __builtin_lasx_xvmaxi_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvmaxi_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmaxi_w(v8i32 _1) { return __builtin_lasx_xvmaxi_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvmaxi_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaxi_d(v4i64 _1) { return __builtin_lasx_xvmaxi_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvmax_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmax_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmax_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmax_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmax_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmax_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmax_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmax_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvmaxi_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvmaxi_bu(v32u8 _1) { return __builtin_lasx_xvmaxi_bu(_1, 1); } // CHECK-LABEL: define dso_local void @xvmaxi_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvmaxi_hu(v16u16 _1) { return __builtin_lasx_xvmaxi_hu(_1, 1); } // CHECK-LABEL: define dso_local void @xvmaxi_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvmaxi_wu(v8u32 _1) { return __builtin_lasx_xvmaxi_wu(_1, 1); } // CHECK-LABEL: define dso_local void @xvmaxi_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmaxi_du(v4u64 _1) { return __builtin_lasx_xvmaxi_du(_1, 1); } // CHECK-LABEL: define dso_local void @xvmin_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmin_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmin_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmin_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmin_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmin_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmin_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmin_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmini_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmini_b(v32i8 _1) { return __builtin_lasx_xvmini_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvmini_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmini_h(v16i16 _1) { return __builtin_lasx_xvmini_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvmini_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmini_w(v8i32 _1) { return __builtin_lasx_xvmini_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvmini_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmini_d(v4i64 _1) { return __builtin_lasx_xvmini_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvmin_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmin_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmin_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmin_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmin_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmin_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmin_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmin_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvmini_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvmini_bu(v32u8 _1) { return __builtin_lasx_xvmini_bu(_1, 1); } // CHECK-LABEL: define dso_local void @xvmini_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvmini_hu(v16u16 _1) { return __builtin_lasx_xvmini_hu(_1, 1); } // CHECK-LABEL: define dso_local void @xvmini_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvmini_wu(v8u32 _1) { return __builtin_lasx_xvmini_wu(_1, 1); } // CHECK-LABEL: define dso_local void @xvmini_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmini_du(v4u64 _1) { return __builtin_lasx_xvmini_du(_1, 1); } // CHECK-LABEL: define dso_local void @xvseq_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvseq_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvseq_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvseq_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvseq_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvseq_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvseq_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvseq_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvseqi_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvseqi_b(v32i8 _1) { return __builtin_lasx_xvseqi_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvseqi_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvseqi_h(v16i16 _1) { return __builtin_lasx_xvseqi_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvseqi_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvseqi_w(v8i32 _1) { return __builtin_lasx_xvseqi_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvseqi_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvseqi_d(v4i64 _1) { return __builtin_lasx_xvseqi_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvslt_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvslt_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvslt_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvslt_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvslt_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvslt_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvslt_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvslt_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvslti_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvslti_b(v32i8 _1) { return __builtin_lasx_xvslti_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvslti_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvslti_h(v16i16 _1) { return __builtin_lasx_xvslti_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvslti_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvslti_w(v8i32 _1) { return __builtin_lasx_xvslti_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvslti_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvslti_d(v4i64 _1) { return __builtin_lasx_xvslti_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvslt_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvslt_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvslt_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvslt_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvslt_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvslt_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvslt_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvslt_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvslti_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvslti_bu(v32u8 _1) { return __builtin_lasx_xvslti_bu(_1, 1); } // CHECK-LABEL: define dso_local void @xvslti_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvslti_hu(v16u16 _1) { return __builtin_lasx_xvslti_hu(_1, 1); } // CHECK-LABEL: define dso_local void @xvslti_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvslti_wu(v8u32 _1) { return __builtin_lasx_xvslti_wu(_1, 1); } // CHECK-LABEL: define dso_local void @xvslti_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvslti_du(v4u64 _1) { return __builtin_lasx_xvslti_du(_1, 1); } // CHECK-LABEL: define dso_local void @xvsle_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsle_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsle_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsle_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsle_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsle_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsle_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsle_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvslei_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvslei_b(v32i8 _1) { return __builtin_lasx_xvslei_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvslei_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvslei_h(v16i16 _1) { return __builtin_lasx_xvslei_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvslei_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvslei_w(v8i32 _1) { return __builtin_lasx_xvslei_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvslei_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvslei_d(v4i64 _1) { return __builtin_lasx_xvslei_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvsle_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsle_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsle_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsle_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsle_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsle_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsle_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsle_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvslei_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvslei_bu(v32u8 _1) { return __builtin_lasx_xvslei_bu(_1, 1); } // CHECK-LABEL: define dso_local void @xvslei_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvslei_hu(v16u16 _1) { return __builtin_lasx_xvslei_hu(_1, 1); } // CHECK-LABEL: define dso_local void @xvslei_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvslei_wu(v8u32 _1) { return __builtin_lasx_xvslei_wu(_1, 1); } // CHECK-LABEL: define dso_local void @xvslei_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvslei_du(v4u64 _1) { return __builtin_lasx_xvslei_du(_1, 1); } // CHECK-LABEL: define dso_local void @xvsat_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsat_b(v32i8 _1) { return __builtin_lasx_xvsat_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvsat_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsat_h(v16i16 _1) { return __builtin_lasx_xvsat_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvsat_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsat_w(v8i32 _1) { return __builtin_lasx_xvsat_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvsat_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsat_d(v4i64 _1) { return __builtin_lasx_xvsat_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvsat_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvsat_bu(v32u8 _1) { return __builtin_lasx_xvsat_bu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsat_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvsat_hu(v16u16 _1) { return __builtin_lasx_xvsat_hu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsat_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvsat_wu(v8u32 _1) { return __builtin_lasx_xvsat_wu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsat_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvsat_du(v4u64 _1) { return __builtin_lasx_xvsat_du(_1, 1); } // CHECK-LABEL: define dso_local void @xvadda_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadda_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvadda_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadda_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvadda_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadda_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvadda_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadda_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsadd_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsadd_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsadd_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsadd_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsadd_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsadd_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsadd_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsadd_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsadd_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsadd_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsadd_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsadd_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsadd_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsadd_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsadd_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsadd_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvavg_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavg_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvavg_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavg_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvavg_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavg_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvavg_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavg_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvavg_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavg_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvavg_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavg_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvavg_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavg_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvavg_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavg_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvavgr_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavgr_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvavgr_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavgr_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvavgr_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavgr_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvavgr_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavgr_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvavgr_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavgr_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvavgr_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavgr_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvavgr_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavgr_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvavgr_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavgr_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvssub_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssub_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvssub_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssub_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssub_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssub_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssub_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssub_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvssub_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvssub_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvssub_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssub_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvssub_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssub_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvssub_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssub_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvabsd_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvabsd_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvabsd_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvabsd_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvabsd_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvabsd_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvabsd_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvabsd_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvabsd_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvabsd_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvabsd_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvabsd_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvabsd_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvabsd_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvabsd_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvabsd_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvmul_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmul_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmul_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmul_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmul_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmul_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmul_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmul_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmadd_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmadd_b(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmadd_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmadd_h(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmadd_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmadd_w(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmadd_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmadd_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmsub_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmsub_b(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmsub_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmsub_h(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmsub_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmsub_w(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmsub_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmsub_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvdiv_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvdiv_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvdiv_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvdiv_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvdiv_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvdiv_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvdiv_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvdiv_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvdiv_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvdiv_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvdiv_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvdiv_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvdiv_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvdiv_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvdiv_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvdiv_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvhaddw_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhaddw_h_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvhaddw_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhaddw_w_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvhaddw_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhaddw_d_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvhaddw_hu_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhaddw_hu_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvhaddw_wu_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhaddw_wu_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvhaddw_du_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhaddw_du_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvhsubw_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhsubw_h_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvhsubw_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhsubw_w_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvhsubw_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhsubw_d_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvhsubw_hu_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhsubw_hu_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvhsubw_wu_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhsubw_wu_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvhsubw_du_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhsubw_du_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmod_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmod_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmod_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmod_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmod_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmod_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmod_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmod_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmod_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmod_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmod_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmod_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmod_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmod_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmod_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmod_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvrepl128vei_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvrepl128vei_b(v32i8 _1) { return __builtin_lasx_xvrepl128vei_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvrepl128vei_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvrepl128vei_h(v16i16 _1) { return __builtin_lasx_xvrepl128vei_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvrepl128vei_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvrepl128vei_w(v8i32 _1) { return __builtin_lasx_xvrepl128vei_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvrepl128vei_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvrepl128vei_d(v4i64 _1) { return __builtin_lasx_xvrepl128vei_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvpickev_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickev_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvpickev_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickev_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvpickev_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickev_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvpickev_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickev_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvpickod_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickod_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvpickod_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickod_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvpickod_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickod_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvpickod_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickod_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvilvh_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvh_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvilvh_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvh_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvilvh_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvh_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvilvh_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvh_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvilvl_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvl_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvilvl_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvl_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvilvl_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvl_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvilvl_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvl_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvpackev_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackev_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvpackev_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackev_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvpackev_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackev_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvpackev_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackev_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvpackod_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackod_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvpackod_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackod_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvpackod_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackod_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvpackod_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackod_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvshuf_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvshuf_b(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvshuf_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvshuf_h(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvshuf_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvshuf_w(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvshuf_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvshuf_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvand_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvand_v(_1, _2); } // CHECK-LABEL: define dso_local void @xvandi_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvandi_b(v32u8 _1) { return __builtin_lasx_xvandi_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvor_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvor_v(_1, _2); } // CHECK-LABEL: define dso_local void @xvori_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvori_b(v32u8 _1) { return __builtin_lasx_xvori_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvnor_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvnor_v(_1, _2); } // CHECK-LABEL: define dso_local void @xvnori_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvnori_b(v32u8 _1) { return __builtin_lasx_xvnori_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvxor_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvxor_v(_1, _2); } // CHECK-LABEL: define dso_local void @xvxori_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvxori_b(v32u8 _1) { return __builtin_lasx_xvxori_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvbitsel_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvbitsel_v(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvbitseli_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitseli_b(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvshuf4i_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvshuf4i_b(v32i8 _1) { return __builtin_lasx_xvshuf4i_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvshuf4i_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvshuf4i_h(v16i16 _1) { return __builtin_lasx_xvshuf4i_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvshuf4i_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvshuf4i_w(v8i32 _1) { return __builtin_lasx_xvshuf4i_w(_1, 1); } @@ -2837,7 +2837,7 @@ v8i32 xvshuf4i_w(v8i32 _1) { return __builtin_lasx_xvshuf4i_w(_1, 1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvreplgr2vr_b(int _1) { return __builtin_lasx_xvreplgr2vr_b(_1); } @@ -2845,7 +2845,7 @@ v32i8 xvreplgr2vr_b(int _1) { return __builtin_lasx_xvreplgr2vr_b(_1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvreplgr2vr_h(int _1) { return __builtin_lasx_xvreplgr2vr_h(_1); } @@ -2853,7 +2853,7 @@ v16i16 xvreplgr2vr_h(int _1) { return __builtin_lasx_xvreplgr2vr_h(_1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvreplgr2vr_w(int _1) { return __builtin_lasx_xvreplgr2vr_w(_1); } @@ -2862,1641 +2862,1641 @@ v8i32 xvreplgr2vr_w(int _1) { return __builtin_lasx_xvreplgr2vr_w(_1); } // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1]] to i64 // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) -// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvreplgr2vr_d(int _1) { return __builtin_lasx_xvreplgr2vr_d(_1); } // CHECK-LABEL: define dso_local void @xvpcnt_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvpcnt_b(v32i8 _1) { return __builtin_lasx_xvpcnt_b(_1); } // CHECK-LABEL: define dso_local void @xvpcnt_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvpcnt_h(v16i16 _1) { return __builtin_lasx_xvpcnt_h(_1); } // CHECK-LABEL: define dso_local void @xvpcnt_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvpcnt_w(v8i32 _1) { return __builtin_lasx_xvpcnt_w(_1); } // CHECK-LABEL: define dso_local void @xvpcnt_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvpcnt_d(v4i64 _1) { return __builtin_lasx_xvpcnt_d(_1); } // CHECK-LABEL: define dso_local void @xvclo_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvclo_b(v32i8 _1) { return __builtin_lasx_xvclo_b(_1); } // CHECK-LABEL: define dso_local void @xvclo_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvclo_h(v16i16 _1) { return __builtin_lasx_xvclo_h(_1); } // CHECK-LABEL: define dso_local void @xvclo_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvclo_w(v8i32 _1) { return __builtin_lasx_xvclo_w(_1); } // CHECK-LABEL: define dso_local void @xvclo_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvclo_d(v4i64 _1) { return __builtin_lasx_xvclo_d(_1); } // CHECK-LABEL: define dso_local void @xvclz_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvclz_b(v32i8 _1) { return __builtin_lasx_xvclz_b(_1); } // CHECK-LABEL: define dso_local void @xvclz_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvclz_h(v16i16 _1) { return __builtin_lasx_xvclz_h(_1); } // CHECK-LABEL: define dso_local void @xvclz_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvclz_w(v8i32 _1) { return __builtin_lasx_xvclz_w(_1); } // CHECK-LABEL: define dso_local void @xvclz_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvclz_d(v4i64 _1) { return __builtin_lasx_xvclz_d(_1); } // CHECK-LABEL: define dso_local void @xvfadd_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfadd_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfadd_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfadd_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfsub_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfsub_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfsub_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfsub_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmul_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmul_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmul_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmul_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfdiv_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfdiv_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfdiv_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfdiv_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcvt_h_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcvt_h_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcvt_s_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcvt_s_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmin_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmin_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmin_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmin_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmina_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmina_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmina_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmina_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmax_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmax_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmax_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmax_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmaxa_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmaxa_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmaxa_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmaxa_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfclass_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfclass_s(v8f32 _1) { return __builtin_lasx_xvfclass_s(_1); } // CHECK-LABEL: define dso_local void @xvfclass_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfclass_d(v4f64 _1) { return __builtin_lasx_xvfclass_d(_1); } // CHECK-LABEL: define dso_local void @xvfsqrt_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfsqrt_s(v8f32 _1) { return __builtin_lasx_xvfsqrt_s(_1); } // CHECK-LABEL: define dso_local void @xvfsqrt_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfsqrt_d(v4f64 _1) { return __builtin_lasx_xvfsqrt_d(_1); } // CHECK-LABEL: define dso_local void @xvfrecip_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfrecip_s(v8f32 _1) { return __builtin_lasx_xvfrecip_s(_1); } // CHECK-LABEL: define dso_local void @xvfrecip_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfrecip_d(v4f64 _1) { return __builtin_lasx_xvfrecip_d(_1); } // CHECK-LABEL: define dso_local void @xvfrint_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfrint_s(v8f32 _1) { return __builtin_lasx_xvfrint_s(_1); } // CHECK-LABEL: define dso_local void @xvfrint_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfrint_d(v4f64 _1) { return __builtin_lasx_xvfrint_d(_1); } // CHECK-LABEL: define dso_local void @xvfrsqrt_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfrsqrt_s(v8f32 _1) { return __builtin_lasx_xvfrsqrt_s(_1); } // CHECK-LABEL: define dso_local void @xvfrsqrt_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfrsqrt_d(v4f64 _1) { return __builtin_lasx_xvfrsqrt_d(_1); } // CHECK-LABEL: define dso_local void @xvflogb_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvflogb_s(v8f32 _1) { return __builtin_lasx_xvflogb_s(_1); } // CHECK-LABEL: define dso_local void @xvflogb_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvflogb_d(v4f64 _1) { return __builtin_lasx_xvflogb_d(_1); } // CHECK-LABEL: define dso_local void @xvfcvth_s_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfcvth_s_h(v16i16 _1) { return __builtin_lasx_xvfcvth_s_h(_1); } // CHECK-LABEL: define dso_local void @xvfcvth_d_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfcvth_d_s(v8f32 _1) { return __builtin_lasx_xvfcvth_d_s(_1); } // CHECK-LABEL: define dso_local void @xvfcvtl_s_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfcvtl_s_h(v16i16 _1) { return __builtin_lasx_xvfcvtl_s_h(_1); } // CHECK-LABEL: define dso_local void @xvfcvtl_d_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfcvtl_d_s(v8f32 _1) { return __builtin_lasx_xvfcvtl_d_s(_1); } // CHECK-LABEL: define dso_local void @xvftint_w_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftint_w_s(v8f32 _1) { return __builtin_lasx_xvftint_w_s(_1); } // CHECK-LABEL: define dso_local void @xvftint_l_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftint_l_d(v4f64 _1) { return __builtin_lasx_xvftint_l_d(_1); } // CHECK-LABEL: define dso_local void @xvftint_wu_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvftint_wu_s(v8f32 _1) { return __builtin_lasx_xvftint_wu_s(_1); } // CHECK-LABEL: define dso_local void @xvftint_lu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvftint_lu_d(v4f64 _1) { return __builtin_lasx_xvftint_lu_d(_1); } // CHECK-LABEL: define dso_local void @xvftintrz_w_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftintrz_w_s(v8f32 _1) { return __builtin_lasx_xvftintrz_w_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrz_l_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrz_l_d(v4f64 _1) { return __builtin_lasx_xvftintrz_l_d(_1); } // CHECK-LABEL: define dso_local void @xvftintrz_wu_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvftintrz_wu_s(v8f32 _1) { return __builtin_lasx_xvftintrz_wu_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrz_lu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvftintrz_lu_d(v4f64 _1) { return __builtin_lasx_xvftintrz_lu_d(_1); } // CHECK-LABEL: define dso_local void @xvffint_s_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvffint_s_w(v8i32 _1) { return __builtin_lasx_xvffint_s_w(_1); } // CHECK-LABEL: define dso_local void @xvffint_d_l( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvffint_d_l(v4i64 _1) { return __builtin_lasx_xvffint_d_l(_1); } // CHECK-LABEL: define dso_local void @xvffint_s_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvffint_s_wu(v8u32 _1) { return __builtin_lasx_xvffint_s_wu(_1); } // CHECK-LABEL: define dso_local void @xvffint_d_lu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvffint_d_lu(v4u64 _1) { return __builtin_lasx_xvffint_d_lu(_1); } // CHECK-LABEL: define dso_local void @xvreplve_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1]], i32 [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvreplve_b(v32i8 _1, int _2) { return __builtin_lasx_xvreplve_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvreplve_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1]], i32 [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvreplve_h(v16i16 _1, int _2) { return __builtin_lasx_xvreplve_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvreplve_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1]], i32 [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvreplve_w(v8i32 _1, int _2) { return __builtin_lasx_xvreplve_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvreplve_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvreplve_d(v4i64 _1, int _2) { return __builtin_lasx_xvreplve_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvpermi_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpermi_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvandn_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvandn_v(_1, _2); } // CHECK-LABEL: define dso_local void @xvneg_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvneg_b(v32i8 _1) { return __builtin_lasx_xvneg_b(_1); } // CHECK-LABEL: define dso_local void @xvneg_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvneg_h(v16i16 _1) { return __builtin_lasx_xvneg_h(_1); } // CHECK-LABEL: define dso_local void @xvneg_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvneg_w(v8i32 _1) { return __builtin_lasx_xvneg_w(_1); } // CHECK-LABEL: define dso_local void @xvneg_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvneg_d(v4i64 _1) { return __builtin_lasx_xvneg_d(_1); } // CHECK-LABEL: define dso_local void @xvmuh_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmuh_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmuh_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmuh_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmuh_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmuh_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmuh_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmuh_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmuh_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmuh_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmuh_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmuh_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmuh_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmuh_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmuh_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmuh_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvsllwil_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsllwil_h_b(v32i8 _1) { return __builtin_lasx_xvsllwil_h_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvsllwil_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsllwil_w_h(v16i16 _1) { return __builtin_lasx_xvsllwil_w_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvsllwil_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsllwil_d_w(v8i32 _1) { return __builtin_lasx_xvsllwil_d_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvsllwil_hu_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvsllwil_hu_bu(v32u8 _1) { return __builtin_lasx_xvsllwil_hu_bu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsllwil_wu_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvsllwil_wu_hu(v16u16 _1) { return __builtin_lasx_xvsllwil_wu_hu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsllwil_du_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvsllwil_du_wu(v8u32 _1) { return __builtin_lasx_xvsllwil_du_wu(_1, 1); } // CHECK-LABEL: define dso_local void @xvsran_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsran_b_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsran_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsran_h_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsran_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsran_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvssran_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssran_b_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssran_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssran_h_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssran_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssran_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvssran_bu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssran_bu_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssran_hu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssran_hu_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssran_wu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssran_wu_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrarn_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarn_b_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrarn_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarn_h_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrarn_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarn_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrarn_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarn_b_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrarn_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarn_h_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrarn_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarn_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrarn_bu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrarn_bu_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrarn_hu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrarn_hu_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrarn_wu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrarn_wu_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrln_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrln_b_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrln_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrln_h_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrln_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrln_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrln_bu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrln_bu_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrln_hu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrln_hu_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrln_wu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrln_wu_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrlrn_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrn_b_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrlrn_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrn_h_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsrlrn_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrn_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrlrn_bu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrlrn_bu_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrlrn_hu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrlrn_hu_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrlrn_wu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrlrn_wu_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfrstpi_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvfrstpi_b(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvfrstpi_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvfrstpi_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvfrstp_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvfrstp_b(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvfrstp_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvfrstp_h(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvshuf4i_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvshuf4i_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvbsrl_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvbsrl_v(v32i8 _1) { return __builtin_lasx_xvbsrl_v(_1, 1); } // CHECK-LABEL: define dso_local void @xvbsll_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvbsll_v(v32i8 _1) { return __builtin_lasx_xvbsll_v(_1, 1); } // CHECK-LABEL: define dso_local void @xvextrins_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvextrins_b(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvextrins_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvextrins_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvextrins_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvextrins_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvextrins_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvextrins_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvmskltz_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmskltz_b(v32i8 _1) { return __builtin_lasx_xvmskltz_b(_1); } // CHECK-LABEL: define dso_local void @xvmskltz_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmskltz_h(v16i16 _1) { return __builtin_lasx_xvmskltz_h(_1); } // CHECK-LABEL: define dso_local void @xvmskltz_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmskltz_w(v8i32 _1) { return __builtin_lasx_xvmskltz_w(_1); } // CHECK-LABEL: define dso_local void @xvmskltz_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmskltz_d(v4i64 _1) { return __builtin_lasx_xvmskltz_d(_1); } // CHECK-LABEL: define dso_local void @xvsigncov_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsigncov_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsigncov_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsigncov_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsigncov_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsigncov_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsigncov_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsigncov_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfmadd_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmadd_s(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvfmadd_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmadd_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvfmsub_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmsub_s(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvfmsub_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmsub_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvfnmadd_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmadd_s(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvfnmadd_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmadd_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvfnmsub_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmsub_s(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvfnmsub_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmsub_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvftintrne_w_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftintrne_w_s(v8f32 _1) { return __builtin_lasx_xvftintrne_w_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrne_l_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrne_l_d(v4f64 _1) { return __builtin_lasx_xvftintrne_l_d(_1); } // CHECK-LABEL: define dso_local void @xvftintrp_w_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftintrp_w_s(v8f32 _1) { return __builtin_lasx_xvftintrp_w_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrp_l_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrp_l_d(v4f64 _1) { return __builtin_lasx_xvftintrp_l_d(_1); } // CHECK-LABEL: define dso_local void @xvftintrm_w_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftintrm_w_s(v8f32 _1) { return __builtin_lasx_xvftintrm_w_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrm_l_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrm_l_d(v4f64 _1) { return __builtin_lasx_xvftintrm_l_d(_1); } // CHECK-LABEL: define dso_local void @xvftint_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftint_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvffint_s_l( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvffint_s_l(_1, _2); } // CHECK-LABEL: define dso_local void @xvftintrz_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrz_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvftintrp_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrp_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvftintrm_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrm_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvftintrne_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrne_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvftinth_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftinth_l_s(v8f32 _1) { return __builtin_lasx_xvftinth_l_s(_1); } // CHECK-LABEL: define dso_local void @xvftintl_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintl_l_s(v8f32 _1) { return __builtin_lasx_xvftintl_l_s(_1); } // CHECK-LABEL: define dso_local void @xvffinth_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvffinth_d_w(v8i32 _1) { return __builtin_lasx_xvffinth_d_w(_1); } // CHECK-LABEL: define dso_local void @xvffintl_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvffintl_d_w(v8i32 _1) { return __builtin_lasx_xvffintl_d_w(_1); } // CHECK-LABEL: define dso_local void @xvftintrzh_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrzh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzh_l_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrzl_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrzl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzl_l_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrph_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrph_l_s(v8f32 _1) { return __builtin_lasx_xvftintrph_l_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrpl_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrpl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrpl_l_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrmh_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrmh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrmh_l_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrml_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrml_l_s(v8f32 _1) { return __builtin_lasx_xvftintrml_l_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrneh_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrneh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrneh_l_s(_1); } // CHECK-LABEL: define dso_local void @xvftintrnel_l_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvftintrnel_l_s(v8f32 _1) { return __builtin_lasx_xvftintrnel_l_s(_1); } // CHECK-LABEL: define dso_local void @xvfrintrne_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfrintrne_s(v8f32 _1) { return __builtin_lasx_xvfrintrne_s(_1); } // CHECK-LABEL: define dso_local void @xvfrintrne_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfrintrne_d(v4f64 _1) { return __builtin_lasx_xvfrintrne_d(_1); } // CHECK-LABEL: define dso_local void @xvfrintrz_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfrintrz_s(v8f32 _1) { return __builtin_lasx_xvfrintrz_s(_1); } // CHECK-LABEL: define dso_local void @xvfrintrz_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfrintrz_d(v4f64 _1) { return __builtin_lasx_xvfrintrz_d(_1); } // CHECK-LABEL: define dso_local void @xvfrintrp_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfrintrp_s(v8f32 _1) { return __builtin_lasx_xvfrintrp_s(_1); } // CHECK-LABEL: define dso_local void @xvfrintrp_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfrintrp_d(v4f64 _1) { return __builtin_lasx_xvfrintrp_d(_1); } // CHECK-LABEL: define dso_local void @xvfrintrm_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfrintrm_s(v8f32 _1) { return __builtin_lasx_xvfrintrm_s(_1); } // CHECK-LABEL: define dso_local void @xvfrintrm_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfrintrm_d(v4f64 _1) { return __builtin_lasx_xvfrintrm_d(_1); } @@ -4504,14 +4504,14 @@ v4i64 xvfrintrm_d(v4f64 _1) { return __builtin_lasx_xvfrintrm_d(_1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvld(void *_1) { return __builtin_lasx_xvld(_1, 1); } // CHECK-LABEL: define dso_local void @xvst( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2]], i32 1) // CHECK-NEXT: ret void // @@ -4519,7 +4519,7 @@ void xvst(v32i8 _1, void *_2) { return __builtin_lasx_xvst(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvstelm_b( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2]], i32 1, i32 1) // CHECK-NEXT: ret void // @@ -4527,7 +4527,7 @@ void xvstelm_b(v32i8 _1, void * _2) { return __builtin_lasx_xvstelm_b(_1, _2, 1, // CHECK-LABEL: define dso_local void @xvstelm_h( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2]], i32 2, i32 1) // CHECK-NEXT: ret void // @@ -4535,7 +4535,7 @@ void xvstelm_h(v16i16 _1, void * _2) { return __builtin_lasx_xvstelm_h(_1, _2, 2 // CHECK-LABEL: define dso_local void @xvstelm_w( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2]], i32 4, i32 1) // CHECK-NEXT: ret void // @@ -4543,7 +4543,7 @@ void xvstelm_w(v8i32 _1, void * _2) { return __builtin_lasx_xvstelm_w(_1, _2, 4, // CHECK-LABEL: define dso_local void @xvstelm_d( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2]], i32 8, i32 1) // CHECK-NEXT: ret void // @@ -4551,108 +4551,108 @@ void xvstelm_d(v4i64 _1, void * _2) { return __builtin_lasx_xvstelm_d(_1, _2, 8, // CHECK-LABEL: define dso_local void @xvinsve0_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvinsve0_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvinsve0_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvinsve0_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvpickve_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvpickve_w(v8i32 _1) { return __builtin_lasx_xvpickve_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvpickve_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvpickve_d(v4i64 _1) { return __builtin_lasx_xvpickve_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvssrlrn_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrn_b_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrlrn_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrn_h_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrlrn_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrn_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrln_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrln_b_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrln_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrln_h_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvssrln_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrln_w_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvorn_v( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvorn_v(_1, _2); } @@ -4660,7 +4660,7 @@ v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvorn_v(_1, _2); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvldi() { return __builtin_lasx_xvldi(1); } @@ -4668,14 +4668,14 @@ v4i64 xvldi() { return __builtin_lasx_xvldi(1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1]], i64 1) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvldx(void *_1) { return __builtin_lasx_xvldx(_1, 1); } // CHECK-LABEL: define dso_local void @xvstx( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1]], ptr [[_2]], i64 1) // CHECK-NEXT: ret void // @@ -4683,209 +4683,209 @@ void xvstx(v32i8 _1, void *_2) { return __builtin_lasx_xvstx(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvextl_qu_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvextl_qu_du(v4u64 _1) { return __builtin_lasx_xvextl_qu_du(_1); } // CHECK-LABEL: define dso_local void @xvinsgr2vr_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvinsgr2vr_w(v8i32 _1) { return __builtin_lasx_xvinsgr2vr_w(_1, 1, 1); } // CHECK-LABEL: define dso_local void @xvinsgr2vr_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvinsgr2vr_d(v4i64 _1) { return __builtin_lasx_xvinsgr2vr_d(_1, 1, 1); } // CHECK-LABEL: define dso_local void @xvreplve0_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvreplve0_b(v32i8 _1) { return __builtin_lasx_xvreplve0_b(_1); } // CHECK-LABEL: define dso_local void @xvreplve0_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvreplve0_h(v16i16 _1) { return __builtin_lasx_xvreplve0_h(_1); } // CHECK-LABEL: define dso_local void @xvreplve0_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvreplve0_w(v8i32 _1) { return __builtin_lasx_xvreplve0_w(_1); } // CHECK-LABEL: define dso_local void @xvreplve0_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvreplve0_d(v4i64 _1) { return __builtin_lasx_xvreplve0_d(_1); } // CHECK-LABEL: define dso_local void @xvreplve0_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvreplve0_q(v32i8 _1) { return __builtin_lasx_xvreplve0_q(_1); } // CHECK-LABEL: define dso_local void @vext2xv_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 vext2xv_h_b(v32i8 _1) { return __builtin_lasx_vext2xv_h_b(_1); } // CHECK-LABEL: define dso_local void @vext2xv_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 vext2xv_w_h(v16i16 _1) { return __builtin_lasx_vext2xv_w_h(_1); } // CHECK-LABEL: define dso_local void @vext2xv_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 vext2xv_d_w(v8i32 _1) { return __builtin_lasx_vext2xv_d_w(_1); } // CHECK-LABEL: define dso_local void @vext2xv_w_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 vext2xv_w_b(v32i8 _1) { return __builtin_lasx_vext2xv_w_b(_1); } // CHECK-LABEL: define dso_local void @vext2xv_d_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 vext2xv_d_h(v16i16 _1) { return __builtin_lasx_vext2xv_d_h(_1); } // CHECK-LABEL: define dso_local void @vext2xv_d_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 vext2xv_d_b(v32i8 _1) { return __builtin_lasx_vext2xv_d_b(_1); } // CHECK-LABEL: define dso_local void @vext2xv_hu_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 vext2xv_hu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_hu_bu(_1); } // CHECK-LABEL: define dso_local void @vext2xv_wu_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 vext2xv_wu_hu(v16i16 _1) { return __builtin_lasx_vext2xv_wu_hu(_1); } // CHECK-LABEL: define dso_local void @vext2xv_du_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 vext2xv_du_wu(v8i32 _1) { return __builtin_lasx_vext2xv_du_wu(_1); } // CHECK-LABEL: define dso_local void @vext2xv_wu_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 vext2xv_wu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_wu_bu(_1); } // CHECK-LABEL: define dso_local void @vext2xv_du_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 vext2xv_du_hu(v16i16 _1) { return __builtin_lasx_vext2xv_du_hu(_1); } // CHECK-LABEL: define dso_local void @vext2xv_du_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 vext2xv_du_bu(v32i8 _1) { return __builtin_lasx_vext2xv_du_bu(_1); } // CHECK-LABEL: define dso_local void @xvpermi_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpermi_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvpermi_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvpermi_d(v4i64 _1) { return __builtin_lasx_xvpermi_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvperm_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvperm_w(_1, _2); } @@ -4893,7 +4893,7 @@ v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvperm_w(_1, _2); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvldrepl_b(void *_1) { return __builtin_lasx_xvldrepl_b(_1, 1); } @@ -4901,7 +4901,7 @@ v32i8 xvldrepl_b(void *_1) { return __builtin_lasx_xvldrepl_b(_1, 1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1]], i32 2) -// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvldrepl_h(void *_1) { return __builtin_lasx_xvldrepl_h(_1, 2); } @@ -4909,7 +4909,7 @@ v16i16 xvldrepl_h(void *_1) { return __builtin_lasx_xvldrepl_h(_1, 2); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1]], i32 4) -// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvldrepl_w(void *_1) { return __builtin_lasx_xvldrepl_w(_1, 4); } @@ -4917,14 +4917,14 @@ v8i32 xvldrepl_w(void *_1) { return __builtin_lasx_xvldrepl_w(_1, 4); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1]], i32 8) -// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvldrepl_d(void *_1) { return __builtin_lasx_xvldrepl_d(_1, 8); } // CHECK-LABEL: define dso_local signext i32 @xvpickve2gr_w( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -4932,7 +4932,7 @@ int xvpickve2gr_w(v8i32 _1) { return __builtin_lasx_xvpickve2gr_w(_1, 1); } // CHECK-LABEL: define dso_local signext i32 @xvpickve2gr_wu( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -4940,7 +4940,7 @@ unsigned int xvpickve2gr_wu(v8i32 _1) { return __builtin_lasx_xvpickve2gr_wu(_1, // CHECK-LABEL: define dso_local i64 @xvpickve2gr_d( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1) // CHECK-NEXT: ret i64 [[TMP1]] // @@ -4948,7 +4948,7 @@ long xvpickve2gr_d(v4i64 _1) { return __builtin_lasx_xvpickve2gr_d(_1, 1); } // CHECK-LABEL: define dso_local i64 @xvpickve2gr_du( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1) // CHECK-NEXT: ret i64 [[TMP1]] // @@ -4956,1626 +4956,1626 @@ unsigned long int xvpickve2gr_du(v4i64 _1) { return __builtin_lasx_xvpickve2gr_d // CHECK-LABEL: define dso_local void @xvaddwev_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_q_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwev_q_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_d_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwev_d_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_w_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwev_w_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_h_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwev_h_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwev_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwev_q_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwev_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwev_d_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwev_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwev_w_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwev_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwev_h_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwev_q_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwev_q_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwev_d_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwev_d_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwev_w_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwev_w_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwev_h_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwev_h_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_q_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwev_q_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_d_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwev_d_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_w_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwev_w_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_h_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwev_h_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_q_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwod_q_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_d_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwod_d_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_w_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwod_w_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_h_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwod_h_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwod_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwod_q_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwod_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwod_d_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwod_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwod_w_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwod_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwod_h_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwod_q_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwod_q_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwod_d_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwod_d_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwod_w_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwod_w_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvsubwod_h_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwod_h_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_q_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwod_q_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_d_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwod_d_wu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_w_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwod_w_hu(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_h_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwod_h_bu(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_d_wu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_wu_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_w_hu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_hu_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_h_bu_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_bu_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_d_wu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_wu_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_w_hu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_hu_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_h_bu_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_bu_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_d_wu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_wu_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_w_hu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_hu_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_h_bu_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_bu_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_d_wu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_wu_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_w_hu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_hu_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_h_bu_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_bu_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvhaddw_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhaddw_q_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvhaddw_qu_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhaddw_qu_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvhsubw_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhsubw_q_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvhsubw_qu_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhsubw_qu_du(_1, _2); } // CHECK-LABEL: define dso_local void @xvmaddwev_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_w(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_h(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_b(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_q_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwev_q_du(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_d_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwev_d_wu(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_w_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwev_w_hu(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_h_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwev_h_bu(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_w(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_h(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_b(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_q_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwod_q_du(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_d_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwod_d_wu(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_w_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwod_w_hu(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_h_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwod_h_bu(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_q_du_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_du_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_d_wu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_wu_w(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_w_hu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_hu_h(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwev_h_bu_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_bu_b(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_q_du_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_du_d(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_d_wu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_wu_w(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_w_hu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_hu_h(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvmaddwod_h_bu_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_bu_b(_1, _2, _3); } // CHECK-LABEL: define dso_local void @xvrotr_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvrotr_b(_1, _2); } // CHECK-LABEL: define dso_local void @xvrotr_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvrotr_h(_1, _2); } // CHECK-LABEL: define dso_local void @xvrotr_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvrotr_w(_1, _2); } // CHECK-LABEL: define dso_local void @xvrotr_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvrotr_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvadd_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_q(_1, _2); } // CHECK-LABEL: define dso_local void @xvsub_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_q(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwev_q_du_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_du_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvaddwod_q_du_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_du_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwev_q_du_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_du_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmulwod_q_du_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_du_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvmskgez_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmskgez_b(v32i8 _1) { return __builtin_lasx_xvmskgez_b(_1); } // CHECK-LABEL: define dso_local void @xvmsknz_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvmsknz_b(v32i8 _1) { return __builtin_lasx_xvmsknz_b(_1); } // CHECK-LABEL: define dso_local void @xvexth_h_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvexth_h_b(v32i8 _1) { return __builtin_lasx_xvexth_h_b(_1); } // CHECK-LABEL: define dso_local void @xvexth_w_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvexth_w_h(v16i16 _1) { return __builtin_lasx_xvexth_w_h(_1); } // CHECK-LABEL: define dso_local void @xvexth_d_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvexth_d_w(v8i32 _1) { return __builtin_lasx_xvexth_d_w(_1); } // CHECK-LABEL: define dso_local void @xvexth_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvexth_q_d(v4i64 _1) { return __builtin_lasx_xvexth_q_d(_1); } // CHECK-LABEL: define dso_local void @xvexth_hu_bu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvexth_hu_bu(v32u8 _1) { return __builtin_lasx_xvexth_hu_bu(_1); } // CHECK-LABEL: define dso_local void @xvexth_wu_hu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvexth_wu_hu(v16u16 _1) { return __builtin_lasx_xvexth_wu_hu(_1); } // CHECK-LABEL: define dso_local void @xvexth_du_wu( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvexth_du_wu(v8u32 _1) { return __builtin_lasx_xvexth_du_wu(_1); } // CHECK-LABEL: define dso_local void @xvexth_qu_du( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvexth_qu_du(v4u64 _1) { return __builtin_lasx_xvexth_qu_du(_1); } // CHECK-LABEL: define dso_local void @xvrotri_b( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvrotri_b(v32i8 _1) { return __builtin_lasx_xvrotri_b(_1, 1); } // CHECK-LABEL: define dso_local void @xvrotri_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvrotri_h(v16i16 _1) { return __builtin_lasx_xvrotri_h(_1, 1); } // CHECK-LABEL: define dso_local void @xvrotri_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvrotri_w(v8i32 _1) { return __builtin_lasx_xvrotri_w(_1, 1); } // CHECK-LABEL: define dso_local void @xvrotri_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvrotri_d(v4i64 _1) { return __builtin_lasx_xvrotri_d(_1, 1); } // CHECK-LABEL: define dso_local void @xvextl_q_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvextl_q_d(v4i64 _1) { return __builtin_lasx_xvextl_q_d(_1); } // CHECK-LABEL: define dso_local void @xvsrlni_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlni_b_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrlni_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlni_h_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrlni_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlni_w_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrlni_d_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlni_d_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrlrni_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlrni_b_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrlrni_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrni_h_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrlrni_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrni_w_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrlrni_d_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrni_d_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlni_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_b_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlni_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_h_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlni_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_w_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlni_d_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_d_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlni_bu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_bu_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlni_hu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_hu_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlni_wu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_wu_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlni_du_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_du_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlrni_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_b_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlrni_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_h_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlrni_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_w_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlrni_d_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_d_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlrni_bu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_bu_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlrni_hu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_hu_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlrni_wu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_wu_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrlrni_du_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_du_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrani_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrani_b_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrani_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrani_h_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrani_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrani_w_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrani_d_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrani_d_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrarni_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrarni_b_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrarni_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarni_h_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrarni_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarni_w_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvsrarni_d_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarni_d_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrani_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_b_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrani_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_h_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrani_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_w_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrani_d_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_d_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrani_bu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_bu_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrani_hu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_hu_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrani_wu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_wu_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrani_du_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_du_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrarni_b_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_b_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrarni_h_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_h_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrarni_w_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_w_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrarni_d_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_d_q(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrarni_bu_h( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_bu_h(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrarni_hu_w( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_hu_w(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrarni_wu_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_wu_d(_1, _2, 1); } // CHECK-LABEL: define dso_local void @xvssrarni_du_q( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_du_q(_1, _2, 1); } // CHECK-LABEL: define dso_local signext i32 @xbnz_b( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6583,7 +6583,7 @@ int xbnz_b(v32u8 _1) { return __builtin_lasx_xbnz_b(_1); } // CHECK-LABEL: define dso_local signext i32 @xbnz_d( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6591,7 +6591,7 @@ int xbnz_d(v4u64 _1) { return __builtin_lasx_xbnz_d(_1); } // CHECK-LABEL: define dso_local signext i32 @xbnz_h( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6599,7 +6599,7 @@ int xbnz_h(v16u16 _1) { return __builtin_lasx_xbnz_h(_1); } // CHECK-LABEL: define dso_local signext i32 @xbnz_v( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6607,7 +6607,7 @@ int xbnz_v(v32u8 _1) { return __builtin_lasx_xbnz_v(_1); } // CHECK-LABEL: define dso_local signext i32 @xbnz_w( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6615,7 +6615,7 @@ int xbnz_w(v8u32 _1) { return __builtin_lasx_xbnz_w(_1); } // CHECK-LABEL: define dso_local signext i32 @xbz_b( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6623,7 +6623,7 @@ int xbz_b(v32u8 _1) { return __builtin_lasx_xbz_b(_1); } // CHECK-LABEL: define dso_local signext i32 @xbz_d( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6631,7 +6631,7 @@ int xbz_d(v4u64 _1) { return __builtin_lasx_xbz_d(_1); } // CHECK-LABEL: define dso_local signext i32 @xbz_h( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6639,7 +6639,7 @@ int xbz_h(v16u16 _1) { return __builtin_lasx_xbz_h(_1); } // CHECK-LABEL: define dso_local signext i32 @xbz_v( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6647,7 +6647,7 @@ int xbz_v(v32u8 _1) { return __builtin_lasx_xbz_v(_1); } // CHECK-LABEL: define dso_local signext i32 @xbz_w( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -6655,458 +6655,458 @@ int xbz_w(v8u32 _1) { return __builtin_lasx_xbz_w(_1); } // CHECK-LABEL: define dso_local void @xvfcmp_caf_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_caf_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_caf_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_caf_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_ceq_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_ceq_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_ceq_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_ceq_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cle_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cle_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cle_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cle_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_clt_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_clt_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_clt_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_clt_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cne_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cne_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cne_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cne_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cor_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cor_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cor_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cor_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cueq_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cueq_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cueq_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cueq_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cule_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cule_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cule_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cule_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cult_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cult_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cult_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cult_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cun_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cun_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cune_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cune_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cune_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cune_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_cun_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cun_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_saf_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_saf_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_saf_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_saf_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_seq_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_seq_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_seq_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_seq_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sle_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sle_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sle_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sle_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_slt_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_slt_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_slt_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_slt_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sne_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sne_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sne_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sne_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sor_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sor_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sor_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sor_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sueq_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sueq_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sueq_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sueq_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sule_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sule_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sule_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sule_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sult_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sult_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sult_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sult_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sun_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sun_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sune_d( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sune_d(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sune_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sune_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvfcmp_sun_s( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sun_s(_1, _2); } // CHECK-LABEL: define dso_local void @xvpickve_d_f( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4f64 xvpickve_d_f(v4f64 _1) { return __builtin_lasx_xvpickve_d_f(_1, 1); } // CHECK-LABEL: define dso_local void @xvpickve_w_f( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8f32 xvpickve_w_f(v8f32 _1) { return __builtin_lasx_xvpickve_w_f(_1, 1); } @@ -7114,7 +7114,7 @@ v8f32 xvpickve_w_f(v8f32 _1) { return __builtin_lasx_xvpickve_w_f(_1, 1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v32i8 xvrepli_b() { return __builtin_lasx_xvrepli_b(1); } @@ -7122,7 +7122,7 @@ v32i8 xvrepli_b() { return __builtin_lasx_xvrepli_b(1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v4i64 xvrepli_d() { return __builtin_lasx_xvrepli_d(1); } @@ -7130,7 +7130,7 @@ v4i64 xvrepli_d() { return __builtin_lasx_xvrepli_d(1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); } @@ -7138,12 +7138,12 @@ v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); } // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // v8i32 xvrepli_w() { return __builtin_lasx_xvrepli_w(1); } //. -// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/LoongArch/lasx/inline-asm-gcc-regs.c b/clang/test/CodeGen/LoongArch/lasx/inline-asm-gcc-regs.c index ed1a9660a06c9..0dc74ff63d089 100644 --- a/clang/test/CodeGen/LoongArch/lasx/inline-asm-gcc-regs.c +++ b/clang/test/CodeGen/LoongArch/lasx/inline-asm-gcc-regs.c @@ -4,7 +4,7 @@ typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); // CHECK-LABEL: @test_xr0( -// CHECK: tail call void asm sideeffect "", "{$xr0}"(<32 x i8> undef) #[[ATTR1:[0-9]+]], !srcloc !2 +// CHECK: tail call void asm sideeffect "", "{$xr0}"(<32 x i8> undef) #[[ATTR1:[0-9]+]], !srcloc [[META6:![0-9]+]] // void test_xr0() { register v32i8 a asm ("$xr0"); @@ -12,7 +12,7 @@ void test_xr0() { } // CHECK-LABEL: @test_xr7( -// CHECK: tail call void asm sideeffect "", "{$xr7}"(<32 x i8> undef) #[[ATTR1]], !srcloc !3 +// CHECK: tail call void asm sideeffect "", "{$xr7}"(<32 x i8> undef) #[[ATTR1]], !srcloc [[META7:![0-9]+]] // void test_xr7() { register v32i8 a asm ("$xr7"); @@ -20,7 +20,7 @@ void test_xr7() { } // CHECK-LABEL: @test_xr15( -// CHECK: tail call void asm sideeffect "", "{$xr15}"(<32 x i8> undef) #[[ATTR1]], !srcloc !4 +// CHECK: tail call void asm sideeffect "", "{$xr15}"(<32 x i8> undef) #[[ATTR1]], !srcloc [[META8:![0-9]+]] // void test_xr15() { register v32i8 a asm ("$xr15"); @@ -28,7 +28,7 @@ void test_xr15() { } // CHECK-LABEL: @test_xr31( -// CHECK: tail call void asm sideeffect "", "{$xr31}"(<32 x i8> undef) #[[ATTR1]], !srcloc !5 +// CHECK: tail call void asm sideeffect "", "{$xr31}"(<32 x i8> undef) #[[ATTR1]], !srcloc [[META9:![0-9]+]] // void test_xr31() { register v32i8 a asm ("$xr31"); diff --git a/clang/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.c b/clang/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.c index a5cc8798fd66b..cb5e6891885dc 100644 --- a/clang/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.c +++ b/clang/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.c @@ -6,7 +6,7 @@ typedef long long v4i64 __attribute__ ((vector_size(32), aligned(32))); // CHECK-LABEL: define dso_local void @test_u // CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "=f"() #[[ATTR1:[0-9]+]], !srcloc !2 +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "=f"() #[[ATTR1:[0-9]+]], !srcloc [[META6:![0-9]+]] // CHECK-NEXT: ret void // void test_u() { diff --git a/clang/test/CodeGen/LoongArch/lsx/inline-asm-gcc-regs.c b/clang/test/CodeGen/LoongArch/lsx/inline-asm-gcc-regs.c index b05b1c8c15fae..588a3a1249247 100644 --- a/clang/test/CodeGen/LoongArch/lsx/inline-asm-gcc-regs.c +++ b/clang/test/CodeGen/LoongArch/lsx/inline-asm-gcc-regs.c @@ -4,7 +4,7 @@ typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); // CHECK-LABEL: @test_vr0( -// CHECK: tail call void asm sideeffect "", "{$vr0}"(<16 x i8> undef) #[[ATTR1:[0-9]+]], !srcloc !2 +// CHECK: tail call void asm sideeffect "", "{$vr0}"(<16 x i8> undef) #[[ATTR1:[0-9]+]], !srcloc [[META6:![0-9]+]] // void test_vr0() { register v16i8 a asm ("$vr0"); @@ -12,7 +12,7 @@ void test_vr0() { } // CHECK-LABEL: @test_vr7( -// CHECK: tail call void asm sideeffect "", "{$vr7}"(<16 x i8> undef) #[[ATTR1]], !srcloc !3 +// CHECK: tail call void asm sideeffect "", "{$vr7}"(<16 x i8> undef) #[[ATTR1]], !srcloc [[META7:![0-9]+]] // void test_vr7() { register v16i8 a asm ("$vr7"); @@ -20,7 +20,7 @@ void test_vr7() { } // CHECK-LABEL: @test_vr15( -// CHECK: tail call void asm sideeffect "", "{$vr15}"(<16 x i8> undef) #[[ATTR1]], !srcloc !4 +// CHECK: tail call void asm sideeffect "", "{$vr15}"(<16 x i8> undef) #[[ATTR1]], !srcloc [[META8:![0-9]+]] // void test_vr15() { register v16i8 a asm ("$vr15"); @@ -28,7 +28,7 @@ void test_vr15() { } // CHECK-LABEL: @test_vr31( -// CHECK: tail call void asm sideeffect "", "{$vr31}"(<16 x i8> undef) #[[ATTR1]], !srcloc !5 +// CHECK: tail call void asm sideeffect "", "{$vr31}"(<16 x i8> undef) #[[ATTR1]], !srcloc [[META9:![0-9]+]] // void test_vr31() { register v16i8 a asm ("$vr31"); diff --git a/clang/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.c b/clang/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.c index 5e0fae984134e..f0fb6e31a1a02 100644 --- a/clang/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.c +++ b/clang/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.c @@ -6,7 +6,7 @@ typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); // CHECK-LABEL: define dso_local void @test_w // CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> asm sideeffect "vldi ${0:w}, 1", "=f"() #[[ATTR1:[0-9]+]], !srcloc !2 +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> asm sideeffect "vldi ${0:w}, 1", "=f"() #[[ATTR1:[0-9]+]], !srcloc [[META6:![0-9]+]] // CHECK-NEXT: ret void // void test_w() { diff --git a/clang/test/CodeGen/PowerPC/builtins-dmf-vsx-vector-float.c b/clang/test/CodeGen/PowerPC/builtins-dmf-vsx-vector-float.c index 8fc9a68a5a613..f59a964641119 100644 --- a/clang/test/CodeGen/PowerPC/builtins-dmf-vsx-vector-float.c +++ b/clang/test/CodeGen/PowerPC/builtins-dmf-vsx-vector-float.c @@ -1,17 +1,26 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // Update then manual applied to commonize the checks for AIX and LoP. // RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future \ // RUN: -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -O3 -triple powerpc64-ibm-aix -target-cpu future \ -// RUN: -emit-llvm %s -o - | FileCheck %s +// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=AIX -// CHECK-LABEL: void @test_dmxvbf16gerx2( +// CHECK-LABEL: define dso_local void @test_dmxvbf16gerx2( +// CHECK-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8:![0-9]+]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_dmxvbf16gerx2( +// AIX-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC]]) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8:![0-9]+]] +// AIX-NEXT: ret void +// void test_dmxvbf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -19,14 +28,24 @@ void test_dmxvbf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsigne *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_dmxvbf16gerx2nn( +// CHECK-LABEL: define dso_local void @test_dmxvbf16gerx2nn( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_dmxvbf16gerx2nn( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_dmxvbf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -34,14 +53,24 @@ void test_dmxvbf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsig *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_dmxvbf16gerx2np( +// CHECK-LABEL: define dso_local void @test_dmxvbf16gerx2np( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_dmxvbf16gerx2np( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_dmxvbf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -49,14 +78,24 @@ void test_dmxvbf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsig *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_dmxvbf16gerx2pn( +// CHECK-LABEL: define dso_local void @test_dmxvbf16gerx2pn( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_dmxvbf16gerx2pn( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_dmxvbf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -64,14 +103,24 @@ void test_dmxvbf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsig *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_dmxvbf16gerx2pp( +// CHECK-LABEL: define dso_local void @test_dmxvbf16gerx2pp( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_dmxvbf16gerx2pp( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_dmxvbf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -79,13 +128,22 @@ void test_dmxvbf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsig *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_pmdmxvbf16gerx2( +// CHECK-LABEL: define dso_local void @test_pmdmxvbf16gerx2( +// CHECK-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_pmdmxvbf16gerx2( +// AIX-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_pmdmxvbf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -93,14 +151,24 @@ void test_pmdmxvbf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsig *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_pmdmxvbf16gerx2nn( +// CHECK-LABEL: define dso_local void @test_pmdmxvbf16gerx2nn( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_pmdmxvbf16gerx2nn( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_pmdmxvbf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -108,14 +176,24 @@ void test_pmdmxvbf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector uns *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_pmdmxvbf16gerx2np( +// CHECK-LABEL: define dso_local void @test_pmdmxvbf16gerx2np( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_pmdmxvbf16gerx2np( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_pmdmxvbf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -123,14 +201,24 @@ void test_pmdmxvbf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector uns *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_pmdmxvbf16gerx2pn( +// CHECK-LABEL: define dso_local void @test_pmdmxvbf16gerx2pn( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_pmdmxvbf16gerx2pn( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_pmdmxvbf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -138,14 +226,24 @@ void test_pmdmxvbf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector uns *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_pmdmxvbf16gerx2pp( +// CHECK-LABEL: define dso_local void @test_pmdmxvbf16gerx2pp( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_pmdmxvbf16gerx2pp( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_pmdmxvbf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -153,13 +251,22 @@ void test_pmdmxvbf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector uns *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_dmxvf16gerx2( +// CHECK-LABEL: define dso_local void @test_dmxvf16gerx2( +// CHECK-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_dmxvf16gerx2( +// AIX-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC]]) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_dmxvf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -167,14 +274,24 @@ void test_dmxvf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsigned *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_dmxvf16gerx2nn( +// CHECK-LABEL: define dso_local void @test_dmxvf16gerx2nn( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_dmxvf16gerx2nn( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_dmxvf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -182,14 +299,24 @@ void test_dmxvf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsign *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_dmxvf16gerx2np( +// CHECK-LABEL: define dso_local void @test_dmxvf16gerx2np( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_dmxvf16gerx2np( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_dmxvf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -197,14 +324,24 @@ void test_dmxvf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsign *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_dmxvf16gerx2pn( +// CHECK-LABEL: define dso_local void @test_dmxvf16gerx2pn( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_dmxvf16gerx2pn( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_dmxvf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -212,14 +349,24 @@ void test_dmxvf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsign *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_dmxvf16gerx2pp( +// CHECK-LABEL: define dso_local void @test_dmxvf16gerx2pp( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_dmxvf16gerx2pp( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_dmxvf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -227,13 +374,22 @@ void test_dmxvf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsign *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_pmdmxvf16gerx2( +// CHECK-LABEL: define dso_local void @test_pmdmxvf16gerx2( +// CHECK-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_pmdmxvf16gerx2( +// AIX-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_pmdmxvf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -241,14 +397,24 @@ void test_pmdmxvf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsign *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_pmdmxvf16gerx2nn( +// CHECK-LABEL: define dso_local void @test_pmdmxvf16gerx2nn( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_pmdmxvf16gerx2nn( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_pmdmxvf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -256,14 +422,24 @@ void test_pmdmxvf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsi *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_pmdmxvf16gerx2np( +// CHECK-LABEL: define dso_local void @test_pmdmxvf16gerx2np( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_pmdmxvf16gerx2np( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_pmdmxvf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -271,14 +447,24 @@ void test_pmdmxvf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsi *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_pmdmxvf16gerx2pn( +// CHECK-LABEL: define dso_local void @test_pmdmxvf16gerx2pn( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_pmdmxvf16gerx2pn( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_pmdmxvf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -286,14 +472,24 @@ void test_pmdmxvf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsi *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: void @test_pmdmxvf16gerx2pp( +// CHECK-LABEL: define dso_local void @test_pmdmxvf16gerx2pp( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_pmdmxvf16gerx2pp( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: ret void +// void test_pmdmxvf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -301,9 +497,18 @@ void test_pmdmxvf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsi *((__dmr1024 *)resp) = vdmr; } -// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"__vector_pair", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +//. +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} -// CHECK: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK: [[META7]] = !{!"__dmr1024", [[META4]], i64 0} +// CHECK: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"__vector_pair", [[META4]], i64 0} +// CHECK: [[__DMR1024_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +// CHECK: [[META9]] = !{!"__dmr1024", [[META4]], i64 0} +//. +// AIX: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// AIX: [[META5]] = !{!"Simple C/C++ TBAA"} +// AIX: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// AIX: [[META7]] = !{!"__vector_pair", [[META4]], i64 0} +// AIX: [[__DMR1024_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +// AIX: [[META9]] = !{!"__dmr1024", [[META4]], i64 0} +//. diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c index 59b71cd355813..e602be7c59ae5 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c @@ -10,14 +10,14 @@ // CHECK-LE-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], <16 x i8> noundef [[VC3:%.*]], <16 x i8> noundef [[VC4:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-LE-NEXT: [[ENTRY:.*:]] // CHECK-LE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC4]], <16 x i8> [[VC3]], <16 x i8> [[VC2]], <16 x i8> [[VC1]]) -// CHECK-LE-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2:![0-9]+]] +// CHECK-LE-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6:![0-9]+]] // CHECK-LE-NEXT: ret void // // CHECK-BE-LABEL: define dso_local void @test1( // CHECK-BE-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], <16 x i8> noundef [[VC3:%.*]], <16 x i8> noundef [[VC4:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-BE-NEXT: [[ENTRY:.*:]] // CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC1]], <16 x i8> [[VC2]], <16 x i8> [[VC3]], <16 x i8> [[VC4]]) -// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2:![0-9]+]] +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6:![0-9]+]] // CHECK-BE-NEXT: ret void // // CHECK-LE-NOOPT-LABEL: define dso_local void @test1( @@ -70,14 +70,14 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vec // CHECK-LE-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-LE-NEXT: [[ENTRY:.*:]] // CHECK-LE-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC2]], <16 x i8> [[VC1]]) -// CHECK-LE-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]] +// CHECK-LE-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA8:![0-9]+]] // CHECK-LE-NEXT: ret void // // CHECK-BE-LABEL: define dso_local void @test2( // CHECK-BE-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-BE-NEXT: [[ENTRY:.*:]] // CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC1]], <16 x i8> [[VC2]]) -// CHECK-BE-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]] +// CHECK-BE-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA8:![0-9]+]] // CHECK-BE-NEXT: ret void // // CHECK-LE-NOOPT-LABEL: define dso_local void @test2( @@ -120,17 +120,17 @@ void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, *((__vector_pair *)resp) = res; } //. -// CHECK-LE: [[__VECTOR_QUAD_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK-LE: [[META3]] = !{!"__vector_quad", [[META4:![0-9]+]], i64 0} -// CHECK-LE: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-LE: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK-LE: [[META5]] = !{!"Simple C/C++ TBAA"} -// CHECK-LE: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK-LE: [[META7]] = !{!"__vector_pair", [[META4]], i64 0} +// CHECK-LE: [[__VECTOR_QUAD_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-LE: [[META7]] = !{!"__vector_quad", [[META4]], i64 0} +// CHECK-LE: [[__VECTOR_PAIR_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +// CHECK-LE: [[META9]] = !{!"__vector_pair", [[META4]], i64 0} //. -// CHECK-BE: [[__VECTOR_QUAD_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK-BE: [[META3]] = !{!"__vector_quad", [[META4:![0-9]+]], i64 0} -// CHECK-BE: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-BE: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK-BE: [[META5]] = !{!"Simple C/C++ TBAA"} -// CHECK-BE: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK-BE: [[META7]] = !{!"__vector_pair", [[META4]], i64 0} +// CHECK-BE: [[__VECTOR_QUAD_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-BE: [[META7]] = !{!"__vector_quad", [[META4]], i64 0} +// CHECK-BE: [[__VECTOR_PAIR_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +// CHECK-BE: [[META9]] = !{!"__vector_pair", [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c b/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c index d8306a74ad2e9..585d8bac57181 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c @@ -8,17 +8,17 @@ // CHECK-LABEL: define dso_local void @test_dmxvi8gerx4( // CHECK-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6:![0-9]+]] +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8:![0-9]+]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_dmxvi8gerx4( // AIX-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2:![0-9]+]] +// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]] // AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]]) -// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6:![0-9]+]] +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8:![0-9]+]] // AIX-NEXT: ret void // void test_dmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -31,17 +31,17 @@ void test_dmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned // CHECK-LABEL: define dso_local void @test_pmdmxvi8gerx4( // CHECK-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_pmdmxvi8gerx4( // AIX-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] // AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // AIX-NEXT: ret void // void test_pmdmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -54,19 +54,19 @@ void test_pmdmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigne // CHECK-LABEL: define dso_local void @test_dmxvi8gerx4pp( // CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_dmxvi8gerx4pp( // AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] -// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] // AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) -// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // AIX-NEXT: ret void // void test_dmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -79,19 +79,19 @@ void test_dmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigne // CHECK-LABEL: define dso_local void @test_pmdmxvi8gerx4pp( // CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_pmdmxvi8gerx4pp( // AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] -// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] // AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // AIX-NEXT: ret void // void test_pmdmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -104,19 +104,19 @@ void test_pmdmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsig // CHECK-LABEL: define dso_local void @test_dmxvi8gerx4spp( // CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_dmxvi8gerx4spp( // AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] -// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] // AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) -// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // AIX-NEXT: ret void // void test_dmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -129,19 +129,19 @@ void test_dmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsign // CHECK-LABEL: define dso_local void @test_pmdmxvi8gerx4spp( // CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_pmdmxvi8gerx4spp( // AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] -// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] // AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // AIX-NEXT: ret void // void test_pmdmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -185,7 +185,7 @@ void test_dmf_basic(char *p, char *res1, char *res2) { // CHECK-LABEL: define dso_local void @test_dmf_basic2( // CHECK-SAME: ptr noundef readonly captures(none) [[P1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES2:%.*]], ptr noundef readonly captures(none) [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V]], align 16, !tbaa [[CHAR_TBAA8:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V]], align 16, !tbaa [[CHAR_TBAA10:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]]) // CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES2]], align 128 // CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1]], align 128 @@ -195,7 +195,7 @@ void test_dmf_basic(char *p, char *res1, char *res2) { // AIX-LABEL: define void @test_dmf_basic2( // AIX-SAME: ptr noundef readonly captures(none) [[P1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES2:%.*]], ptr noundef readonly captures(none) [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V]], align 16, !tbaa [[CHAR_TBAA8:![0-9]+]] +// AIX-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V]], align 16, !tbaa [[CHAR_TBAA10:![0-9]+]] // AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]]) // AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES2]], align 128 // AIX-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1]], align 128 @@ -212,19 +212,19 @@ void test_dmf_basic2(char *p1, char *res1, char *res2, // CHECK-LABEL: define dso_local void @test_dmsha2hash( // CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP1:%.*]], ptr noundef readonly captures(none) [[VDMRP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 1) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_dmsha2hash( // AIX-SAME: ptr noundef readonly captures(none) [[VDMRP1:%.*]], ptr noundef readonly captures(none) [[VDMRP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA6]] -// AIX-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA8]] +// AIX-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA8]] // AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 1) -// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // AIX-NEXT: ret void // void test_dmsha2hash(unsigned char *vdmrp1, unsigned char *vdmrp2, unsigned char *resp) { @@ -237,17 +237,17 @@ void test_dmsha2hash(unsigned char *vdmrp1, unsigned char *vdmrp2, unsigned char // CHECK-LABEL: define dso_local void @test_dmsha3hash( // CHECK-SAME: ptr noundef readonly captures(none) [[VDMRPP:%.*]], ptr noundef writeonly captures(none) initializes((0, 256)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA9:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA11:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> [[TMP0]], i32 4) -// CHECK-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA9]] +// CHECK-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA11]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_dmsha3hash( // AIX-SAME: ptr noundef readonly captures(none) [[VDMRPP:%.*]], ptr noundef writeonly captures(none) initializes((0, 256)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA9:![0-9]+]] +// AIX-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA11:![0-9]+]] // AIX-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> [[TMP0]], i32 4) -// AIX-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA9]] +// AIX-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA11]] // AIX-NEXT: ret void // void test_dmsha3hash(unsigned char *vdmrpp, unsigned char *resp) { @@ -259,17 +259,17 @@ void test_dmsha3hash(unsigned char *vdmrpp, unsigned char *resp) { // CHECK-LABEL: define dso_local void @test_dmxxshapad( // CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 2, i32 1, i32 5) -// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // CHECK-NEXT: ret void // // AIX-LABEL: define void @test_dmxxshapad( // AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIX-NEXT: [[ENTRY:.*:]] -// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]] // AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 2, i32 1, i32 5) -// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]] // AIX-NEXT: ret void // void test_dmxxshapad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) { @@ -278,23 +278,23 @@ void test_dmxxshapad(unsigned char *vdmrp, vector unsigned char vc, unsigned cha *((__dmr1024 *)resp) = vdmr; } //. -// CHECK: [[__VECTOR_PAIR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"__vector_pair", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} -// CHECK: [[__DMR1024_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK: [[META7]] = !{!"__dmr1024", [[META4]], i64 0} -// CHECK: [[CHAR_TBAA8]] = !{[[META4]], [[META4]], i64 0} -// CHECK: [[__DMR2048_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} -// CHECK: [[META10]] = !{!"__dmr2048", [[META4]], i64 0} +// CHECK: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"__vector_pair", [[META4]], i64 0} +// CHECK: [[__DMR1024_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +// CHECK: [[META9]] = !{!"__dmr1024", [[META4]], i64 0} +// CHECK: [[CHAR_TBAA10]] = !{[[META4]], [[META4]], i64 0} +// CHECK: [[__DMR2048_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} +// CHECK: [[META12]] = !{!"__dmr2048", [[META4]], i64 0} //. -// AIX: [[__VECTOR_PAIR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// AIX: [[META3]] = !{!"__vector_pair", [[META4:![0-9]+]], i64 0} -// AIX: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// AIX: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // AIX: [[META5]] = !{!"Simple C/C++ TBAA"} -// AIX: [[__DMR1024_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// AIX: [[META7]] = !{!"__dmr1024", [[META4]], i64 0} -// AIX: [[CHAR_TBAA8]] = !{[[META4]], [[META4]], i64 0} -// AIX: [[__DMR2048_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} -// AIX: [[META10]] = !{!"__dmr2048", [[META4]], i64 0} +// AIX: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// AIX: [[META7]] = !{!"__vector_pair", [[META4]], i64 0} +// AIX: [[__DMR1024_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +// AIX: [[META9]] = !{!"__dmr1024", [[META4]], i64 0} +// AIX: [[CHAR_TBAA10]] = !{[[META4]], [[META4]], i64 0} +// AIX: [[__DMR2048_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} +// AIX: [[META12]] = !{!"__dmr2048", [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c index 5c7b222cb618e..74f9714446e00 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c @@ -8,7 +8,7 @@ // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2:![0-9]+]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6:![0-9]+]] // CHECK-NEXT: ret void // void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -45,7 +45,7 @@ void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]] +// CHECK-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA8:![0-9]+]] // CHECK-NEXT: ret void // void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -75,9 +75,9 @@ void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi // CHECK-LABEL: define dso_local void @test5( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> [[TMP0]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -90,9 +90,9 @@ void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi // CHECK-LABEL: define dso_local void @test6( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> [[TMP0]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test6(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -106,7 +106,7 @@ void test6(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test7(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -120,7 +120,7 @@ void test7(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test8(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -134,7 +134,7 @@ void test8(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test9(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -148,7 +148,7 @@ void test9(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test10(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -162,7 +162,7 @@ void test10(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test11(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -176,7 +176,7 @@ void test11(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test12(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -190,7 +190,7 @@ void test12(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test13(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -203,9 +203,9 @@ void test13(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test14( // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA8]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP0]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test14(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -219,7 +219,7 @@ void test14(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test15(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -233,7 +233,7 @@ void test15(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test16(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -247,7 +247,7 @@ void test16(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test17(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -261,7 +261,7 @@ void test17(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test18(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -275,7 +275,7 @@ void test18(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test19(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -289,7 +289,7 @@ void test19(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test20(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -302,9 +302,9 @@ void test20(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test21( // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA8]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test21(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -317,9 +317,9 @@ void test21(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test22( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test22(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -332,9 +332,9 @@ void test22(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test23( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test23(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -347,9 +347,9 @@ void test23(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test24( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test24(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -362,9 +362,9 @@ void test24(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test25( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test25(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -377,9 +377,9 @@ void test25(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test26( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test26(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -392,9 +392,9 @@ void test26(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test27( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test27(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -407,9 +407,9 @@ void test27(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test28( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test28(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -422,9 +422,9 @@ void test28(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test29( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test29(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -437,9 +437,9 @@ void test29(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test30( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test30(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -452,9 +452,9 @@ void test30(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test31( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test31(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -467,9 +467,9 @@ void test31(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test32( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test32(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -482,9 +482,9 @@ void test32(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test33( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test33(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -497,9 +497,9 @@ void test33(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test34( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test34(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -512,9 +512,9 @@ void test34(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test35( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test35(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -527,9 +527,9 @@ void test35(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test36( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test36(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -542,9 +542,9 @@ void test36(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test37( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test37(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -557,9 +557,9 @@ void test37(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test38( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test38(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -572,9 +572,9 @@ void test38(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test39( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test39(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -587,9 +587,9 @@ void test39(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test40( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test40(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -602,9 +602,9 @@ void test40(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test41( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test41(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -617,9 +617,9 @@ void test41(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test42( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test42(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -632,9 +632,9 @@ void test42(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test43( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test43(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -647,9 +647,9 @@ void test43(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test44( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test44(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -662,9 +662,9 @@ void test44(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test45( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test45(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -677,9 +677,9 @@ void test45(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test46( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test46(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -692,9 +692,9 @@ void test46(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test47( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test47(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -707,10 +707,10 @@ void test47(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test48( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA8]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test48(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -723,10 +723,10 @@ void test48(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test49( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA8]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test49(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -739,10 +739,10 @@ void test49(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test50( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA8]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test50(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -755,10 +755,10 @@ void test50(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test51( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA8]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test51(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -771,10 +771,10 @@ void test51(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test52( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA8]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test52(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -787,10 +787,10 @@ void test52(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test53( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA8]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test53(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -803,10 +803,10 @@ void test53(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test54( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA8]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test54(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -819,10 +819,10 @@ void test54(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test55( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA8]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test55(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -836,7 +836,7 @@ void test55(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test56(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -850,7 +850,7 @@ void test56(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test57(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -863,9 +863,9 @@ void test57(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test58( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test58(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -878,9 +878,9 @@ void test58(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test59( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test59(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -893,9 +893,9 @@ void test59(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test60( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test60(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -908,9 +908,9 @@ void test60(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test61( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test61(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -923,9 +923,9 @@ void test61(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test62( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test62(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -938,9 +938,9 @@ void test62(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test63( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test63(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -953,9 +953,9 @@ void test63(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test64( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test64(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -968,9 +968,9 @@ void test64(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns // CHECK-LABEL: define dso_local void @test65( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1079,11 +1079,11 @@ void test72(const __vector_pair *vpp, __vector_pair *vp2) { // CHECK-LABEL: define dso_local void @test73( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[VPP]], i64 8 // CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1096,10 +1096,10 @@ void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v // CHECK-LABEL: define dso_local void @test74( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1112,11 +1112,11 @@ void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v // CHECK-LABEL: define dso_local void @test75( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], i64 noundef [[OFFS:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[VPP]], i64 [[OFFS]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test75(unsigned char *vqp, signed long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1130,7 +1130,7 @@ void test75(unsigned char *vqp, signed long offs, const __vector_pair *vpp, vect // CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA8]] // CHECK-NEXT: ret void // void test76(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1256,11 +1256,11 @@ void test84(const __vector_pair *vpp, __vector_pair *vp2) { // CHECK-LABEL: define dso_local void @test85( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[VPP]], i64 8 // CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test85(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1273,10 +1273,10 @@ void test85(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v // CHECK-LABEL: define dso_local void @test86( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test86(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1289,11 +1289,11 @@ void test86(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v // CHECK-LABEL: define dso_local void @test87( // CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], i64 noundef [[OFFS:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[VPP]], i64 [[OFFS]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6]] // CHECK-NEXT: ret void // void test87(unsigned char *vqp, signed long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1303,10 +1303,10 @@ void test87(unsigned char *vqp, signed long offs, const __vector_pair *vpp, vect *((__vector_quad *)resp) = vq; } //. -// CHECK: [[__VECTOR_QUAD_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"__vector_quad", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} -// CHECK: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK: [[META7]] = !{!"__vector_pair", [[META4]], i64 0} +// CHECK: [[__VECTOR_QUAD_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"__vector_quad", [[META4]], i64 0} +// CHECK: [[__VECTOR_PAIR_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +// CHECK: [[META9]] = !{!"__vector_pair", [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast-less-8.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast-less-8.c index 1f0b3d4a560e7..c943649c76abf 100644 --- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast-less-8.c +++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast-less-8.c @@ -57,7 +57,7 @@ DEFINE_STRUCT(bool64) // CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 1 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[Y]], align 1, !tbaa [[CHAR_TBAA6:![0-9]+]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[Y]], align 1, !tbaa [[CHAR_TBAA10:![0-9]+]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i8.v1i8( poison, <1 x i8> [[TMP0]], i64 0) // CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-128-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i1.nxv8i1( [[TMP1]], i64 0) @@ -74,7 +74,7 @@ vbool32_t read_bool32(struct struct_bool32 *s) { // CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[TMP0]] to // CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x i8> @llvm.vector.extract.v1i8.nxv1i8( [[TMP1]], i64 0) // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 1 -// CHECK-128-NEXT: store <1 x i8> [[CAST_FIXED]], ptr [[Y]], align 1, !tbaa [[CHAR_TBAA6]] +// CHECK-128-NEXT: store <1 x i8> [[CAST_FIXED]], ptr [[Y]], align 1, !tbaa [[CHAR_TBAA10]] // CHECK-128-NEXT: ret void // void write_bool32(struct struct_bool32 *s, vbool32_t x) { @@ -85,7 +85,7 @@ void write_bool32(struct struct_bool32 *s, vbool32_t x) { // CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 1 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[Y]], align 1, !tbaa [[CHAR_TBAA6]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[Y]], align 1, !tbaa [[CHAR_TBAA10]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i8.v1i8( poison, <1 x i8> [[TMP0]], i64 0) // CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-128-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv1i1.nxv8i1( [[TMP1]], i64 0) @@ -102,14 +102,14 @@ vbool64_t read_bool64(struct struct_bool64 *s) { // CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[TMP0]] to // CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x i8> @llvm.vector.extract.v1i8.nxv1i8( [[TMP1]], i64 0) // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 1 -// CHECK-128-NEXT: store <1 x i8> [[CAST_FIXED]], ptr [[Y]], align 1, !tbaa [[CHAR_TBAA6]] +// CHECK-128-NEXT: store <1 x i8> [[CAST_FIXED]], ptr [[Y]], align 1, !tbaa [[CHAR_TBAA10]] // CHECK-128-NEXT: ret void // void write_bool64(struct struct_bool64 *s, vbool64_t x) { s->y[0] = x; } //. -// CHECK-128: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK-128: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} -// CHECK-128: [[META8]] = !{!"Simple C/C++ TBAA"} +// CHECK-128: [[META8:![0-9]+]] = !{!"omnipotent char", [[META9:![0-9]+]], i64 0} +// CHECK-128: [[META9]] = !{!"Simple C/C++ TBAA"} +// CHECK-128: [[CHAR_TBAA10]] = !{[[META8]], [[META8]], i64 0} //. diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast.c index b92e6dff31748..71d6f469ded39 100644 --- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast.c +++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast.c @@ -71,7 +71,7 @@ DEFINE_STRUCT(bool64) // CHECK-64-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-64-NEXT: [[ENTRY:.*:]] // CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6:![0-9]+]] +// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10:![0-9]+]] // CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i64.v1i64( poison, <1 x i64> [[TMP0]], i64 0) // CHECK-64-NEXT: ret [[CAST_SCALABLE]] // @@ -79,7 +79,7 @@ DEFINE_STRUCT(bool64) // CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6:![0-9]+]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10:![0-9]+]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i64.v2i64( poison, <2 x i64> [[TMP0]], i64 0) // CHECK-128-NEXT: ret [[CAST_SCALABLE]] // @@ -87,7 +87,7 @@ DEFINE_STRUCT(bool64) // CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-256-NEXT: [[ENTRY:.*:]] // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 -// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6:![0-9]+]] +// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10:![0-9]+]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i64.v4i64( poison, <4 x i64> [[TMP0]], i64 0) // CHECK-256-NEXT: ret [[CAST_SCALABLE]] // @@ -100,7 +100,7 @@ vint64m1_t read_int64m1(struct struct_int64m1 *s) { // CHECK-64-NEXT: [[ENTRY:.*:]] // CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x i64> @llvm.vector.extract.v1i64.nxv1i64( [[X]], i64 0) // CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8 -// CHECK-64-NEXT: store <1 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-64-NEXT: store <1 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10]] // CHECK-64-NEXT: ret void // // CHECK-128-LABEL: define dso_local void @write_int64m1( @@ -108,7 +108,7 @@ vint64m1_t read_int64m1(struct struct_int64m1 *s) { // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv1i64( [[X]], i64 0) // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 -// CHECK-128-NEXT: store <2 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-128-NEXT: store <2 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: define dso_local void @write_int64m1( @@ -116,7 +116,7 @@ vint64m1_t read_int64m1(struct struct_int64m1 *s) { // CHECK-256-NEXT: [[ENTRY:.*:]] // CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[X]], i64 0) // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 -// CHECK-256-NEXT: store <4 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-256-NEXT: store <4 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10]] // CHECK-256-NEXT: ret void // void write_int64m1(struct struct_int64m1 *s, vint64m1_t x) { @@ -131,7 +131,7 @@ void write_int64m1(struct struct_int64m1 *s, vint64m1_t x) { // CHECK-64-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-64-NEXT: [[ENTRY:.*:]] // CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x double>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x double>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10]] // CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1f64.v1f64( poison, <1 x double> [[TMP0]], i64 0) // CHECK-64-NEXT: ret [[CAST_SCALABLE]] // @@ -139,7 +139,7 @@ void write_int64m1(struct struct_int64m1 *s, vint64m1_t x) { // CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1f64.v2f64( poison, <2 x double> [[TMP0]], i64 0) // CHECK-128-NEXT: ret [[CAST_SCALABLE]] // @@ -147,7 +147,7 @@ void write_int64m1(struct struct_int64m1 *s, vint64m1_t x) { // CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-256-NEXT: [[ENTRY:.*:]] // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 -// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1f64.v4f64( poison, <4 x double> [[TMP0]], i64 0) // CHECK-256-NEXT: ret [[CAST_SCALABLE]] // @@ -160,7 +160,7 @@ vfloat64m1_t read_float64m1(struct struct_float64m1 *s) { // CHECK-64-NEXT: [[ENTRY:.*:]] // CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x double> @llvm.vector.extract.v1f64.nxv1f64( [[X]], i64 0) // CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8 -// CHECK-64-NEXT: store <1 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-64-NEXT: store <1 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10]] // CHECK-64-NEXT: ret void // // CHECK-128-LABEL: define dso_local void @write_float64m1( @@ -168,7 +168,7 @@ vfloat64m1_t read_float64m1(struct struct_float64m1 *s) { // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x double> @llvm.vector.extract.v2f64.nxv1f64( [[X]], i64 0) // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 -// CHECK-128-NEXT: store <2 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-128-NEXT: store <2 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: define dso_local void @write_float64m1( @@ -176,7 +176,7 @@ vfloat64m1_t read_float64m1(struct struct_float64m1 *s) { // CHECK-256-NEXT: [[ENTRY:.*:]] // CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[X]], i64 0) // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 -// CHECK-256-NEXT: store <4 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-256-NEXT: store <4 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10]] // CHECK-256-NEXT: ret void // void write_float64m1(struct struct_float64m1 *s, vfloat64m1_t x) { @@ -191,7 +191,7 @@ void write_float64m1(struct struct_float64m1 *s, vfloat64m1_t x) { // CHECK-64-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-64-NEXT: [[ENTRY:.*:]] // CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10]] // CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v8i8( poison, <8 x i8> [[TMP0]], i64 0) // CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-64-NEXT: ret [[TMP1]] @@ -200,7 +200,7 @@ void write_float64m1(struct struct_float64m1 *s, vfloat64m1_t x) { // CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v16i8( poison, <16 x i8> [[TMP0]], i64 0) // CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-128-NEXT: ret [[TMP1]] @@ -209,7 +209,7 @@ void write_float64m1(struct struct_float64m1 *s, vfloat64m1_t x) { // CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-256-NEXT: [[ENTRY:.*:]] // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 -// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v32i8( poison, <32 x i8> [[TMP0]], i64 0) // CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-256-NEXT: ret [[TMP1]] @@ -224,7 +224,7 @@ vbool1_t read_bool1(struct struct_bool1 *s) { // CHECK-64-NEXT: [[TMP0:%.*]] = bitcast [[X]] to // CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv8i8( [[TMP0]], i64 0) // CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8 -// CHECK-64-NEXT: store <8 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-64-NEXT: store <8 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10]] // CHECK-64-NEXT: ret void // // CHECK-128-LABEL: define dso_local void @write_bool1( @@ -233,7 +233,7 @@ vbool1_t read_bool1(struct struct_bool1 *s) { // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast [[X]] to // CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8( [[TMP0]], i64 0) // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 -// CHECK-128-NEXT: store <16 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-128-NEXT: store <16 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: define dso_local void @write_bool1( @@ -242,22 +242,22 @@ vbool1_t read_bool1(struct struct_bool1 *s) { // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast [[X]] to // CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[TMP0]], i64 0) // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 -// CHECK-256-NEXT: store <32 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-256-NEXT: store <32 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA10]] // CHECK-256-NEXT: ret void // void write_bool1(struct struct_bool1 *s, vbool1_t x) { s->y[0] = x; } //. -// CHECK-64: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK-64: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} -// CHECK-64: [[META8]] = !{!"Simple C/C++ TBAA"} +// CHECK-64: [[META8:![0-9]+]] = !{!"omnipotent char", [[META9:![0-9]+]], i64 0} +// CHECK-64: [[META9]] = !{!"Simple C/C++ TBAA"} +// CHECK-64: [[CHAR_TBAA10]] = !{[[META8]], [[META8]], i64 0} //. -// CHECK-128: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK-128: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} -// CHECK-128: [[META8]] = !{!"Simple C/C++ TBAA"} +// CHECK-128: [[META8:![0-9]+]] = !{!"omnipotent char", [[META9:![0-9]+]], i64 0} +// CHECK-128: [[META9]] = !{!"Simple C/C++ TBAA"} +// CHECK-128: [[CHAR_TBAA10]] = !{[[META8]], [[META8]], i64 0} //. -// CHECK-256: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK-256: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} -// CHECK-256: [[META8]] = !{!"Simple C/C++ TBAA"} +// CHECK-256: [[META8:![0-9]+]] = !{!"omnipotent char", [[META9:![0-9]+]], i64 0} +// CHECK-256: [[META9]] = !{!"Simple C/C++ TBAA"} +// CHECK-256: [[CHAR_TBAA10]] = !{[[META8]], [[META8]], i64 0} //. diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c index 4517b52aefdfd..fd500f014da82 100644 --- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c +++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c @@ -124,7 +124,7 @@ vbool32_t to_vbool32_t(fixed_bool32_t type) { // CHECK-LABEL: define dso_local @to_vint32m1_t__from_gnu_int32m1_t( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6:![0-9]+]] +// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA10:![0-9]+]] // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i32.v8i32( poison, <8 x i32> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -136,7 +136,7 @@ vint32m1_t to_vint32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) { // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], [[TYPE:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TYPE]], i64 0) -// CHECK-NEXT: store <8 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: store <8 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA10]] // CHECK-NEXT: ret void // gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) { @@ -146,7 +146,7 @@ gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) { // CHECK-LABEL: define dso_local @to_fixed_int32m1_t__from_gnu_int32m1_t( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA10]] // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i32.v8i32( poison, <8 x i32> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -158,14 +158,14 @@ fixed_int32m1_t to_fixed_int32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) { // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], noundef [[TYPE_COERCE:%.*]]) local_unnamed_addr #[[ATTR3]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TYPE_COERCE]], i64 0) -// CHECK-NEXT: store <8 x i32> [[TYPE]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: store <8 x i32> [[TYPE]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA10]] // CHECK-NEXT: ret void // gnu_int32m1_t from_fixed_int32m1_t__to_gnu_int32m1_t(fixed_int32m1_t type) { return type; } //. -// CHECK: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} -// CHECK: [[META8]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META8:![0-9]+]] = !{!"omnipotent char", [[META9:![0-9]+]], i64 0} +// CHECK: [[META9]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[CHAR_TBAA10]] = !{[[META8]], [[META8]], i64 0} //. diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-globals.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-globals.c index f3b91b23a73e4..f6c734f0dba66 100644 --- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-globals.c +++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-globals.c @@ -44,14 +44,14 @@ fixed_bool32_t global_bool32; // CHECK-64-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-64-NEXT: [[ENTRY:.*:]] // CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x i64> @llvm.vector.extract.v1i64.nxv1i64( [[V]], i64 0) -// CHECK-64-NEXT: store <1 x i64> [[CAST_FIXED]], ptr @global_i64, align 8, !tbaa [[CHAR_TBAA6:![0-9]+]] +// CHECK-64-NEXT: store <1 x i64> [[CAST_FIXED]], ptr @global_i64, align 8, !tbaa [[CHAR_TBAA10:![0-9]+]] // CHECK-64-NEXT: ret void // // CHECK-256-LABEL: define dso_local void @write_global_i64( // CHECK-256-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-256-NEXT: [[ENTRY:.*:]] // CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[V]], i64 0) -// CHECK-256-NEXT: store <4 x i64> [[CAST_FIXED]], ptr @global_i64, align 8, !tbaa [[CHAR_TBAA6:![0-9]+]] +// CHECK-256-NEXT: store <4 x i64> [[CAST_FIXED]], ptr @global_i64, align 8, !tbaa [[CHAR_TBAA10:![0-9]+]] // CHECK-256-NEXT: ret void // void write_global_i64(vint64m1_t v) { global_i64 = v; } @@ -61,7 +61,7 @@ void write_global_i64(vint64m1_t v) { global_i64 = v; } // CHECK-64-NEXT: [[ENTRY:.*:]] // CHECK-64-NEXT: [[TMP0:%.*]] = bitcast [[V]] to // CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv8i8( [[TMP0]], i64 0) -// CHECK-64-NEXT: store <8 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-64-NEXT: store <8 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[CHAR_TBAA10]] // CHECK-64-NEXT: ret void // // CHECK-256-LABEL: define dso_local void @write_global_bool1( @@ -69,7 +69,7 @@ void write_global_i64(vint64m1_t v) { global_i64 = v; } // CHECK-256-NEXT: [[ENTRY:.*:]] // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast [[V]] to // CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[TMP0]], i64 0) -// CHECK-256-NEXT: store <32 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-256-NEXT: store <32 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[CHAR_TBAA10]] // CHECK-256-NEXT: ret void // void write_global_bool1(vbool1_t v) { global_bool1 = v; } @@ -79,7 +79,7 @@ void write_global_bool1(vbool1_t v) { global_bool1 = v; } // CHECK-64-NEXT: [[ENTRY:.*:]] // CHECK-64-NEXT: [[TMP0:%.*]] = bitcast [[V]] to // CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( [[TMP0]], i64 0) -// CHECK-64-NEXT: store <2 x i8> [[CAST_FIXED]], ptr @global_bool4, align 2, !tbaa [[CHAR_TBAA6]] +// CHECK-64-NEXT: store <2 x i8> [[CAST_FIXED]], ptr @global_bool4, align 2, !tbaa [[CHAR_TBAA10]] // CHECK-64-NEXT: ret void // // CHECK-256-LABEL: define dso_local void @write_global_bool4( @@ -87,7 +87,7 @@ void write_global_bool1(vbool1_t v) { global_bool1 = v; } // CHECK-256-NEXT: [[ENTRY:.*:]] // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast [[V]] to // CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8( [[TMP0]], i64 0) -// CHECK-256-NEXT: store <8 x i8> [[CAST_FIXED]], ptr @global_bool4, align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-256-NEXT: store <8 x i8> [[CAST_FIXED]], ptr @global_bool4, align 8, !tbaa [[CHAR_TBAA10]] // CHECK-256-NEXT: ret void // void write_global_bool4(vbool4_t v) { global_bool4 = v; } @@ -99,7 +99,7 @@ void write_global_bool4(vbool4_t v) { global_bool4 = v; } // CHECK-256-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i1.nxv2i1( zeroinitializer, [[V]], i64 0) // CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[TMP0]] to // CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x i8> @llvm.vector.extract.v1i8.nxv1i8( [[TMP1]], i64 0) -// CHECK-256-NEXT: store <1 x i8> [[CAST_FIXED]], ptr @global_bool32, align 1, !tbaa [[CHAR_TBAA6]] +// CHECK-256-NEXT: store <1 x i8> [[CAST_FIXED]], ptr @global_bool32, align 1, !tbaa [[CHAR_TBAA10]] // CHECK-256-NEXT: ret void // void write_global_bool32(vbool32_t v) { global_bool32 = v; } @@ -112,14 +112,14 @@ void write_global_bool32(vbool32_t v) { global_bool32 = v; } // CHECK-64-LABEL: define dso_local @read_global_i64( // CHECK-64-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-64-NEXT: [[ENTRY:.*:]] -// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr @global_i64, align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr @global_i64, align 8, !tbaa [[CHAR_TBAA10]] // CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i64.v1i64( poison, <1 x i64> [[TMP0]], i64 0) // CHECK-64-NEXT: ret [[CAST_SCALABLE]] // // CHECK-256-LABEL: define dso_local @read_global_i64( // CHECK-256-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-256-NEXT: [[ENTRY:.*:]] -// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr @global_i64, align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr @global_i64, align 8, !tbaa [[CHAR_TBAA10]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i64.v4i64( poison, <4 x i64> [[TMP0]], i64 0) // CHECK-256-NEXT: ret [[CAST_SCALABLE]] // @@ -128,7 +128,7 @@ vint64m1_t read_global_i64() { return global_i64; } // CHECK-64-LABEL: define dso_local @read_global_bool1( // CHECK-64-SAME: ) local_unnamed_addr #[[ATTR2]] { // CHECK-64-NEXT: [[ENTRY:.*:]] -// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool1, align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool1, align 8, !tbaa [[CHAR_TBAA10]] // CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v8i8( poison, <8 x i8> [[TMP0]], i64 0) // CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-64-NEXT: ret [[TMP1]] @@ -136,7 +136,7 @@ vint64m1_t read_global_i64() { return global_i64; } // CHECK-256-LABEL: define dso_local @read_global_bool1( // CHECK-256-SAME: ) local_unnamed_addr #[[ATTR2]] { // CHECK-256-NEXT: [[ENTRY:.*:]] -// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr @global_bool1, align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr @global_bool1, align 8, !tbaa [[CHAR_TBAA10]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v32i8( poison, <32 x i8> [[TMP0]], i64 0) // CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-256-NEXT: ret [[TMP1]] @@ -146,7 +146,7 @@ vbool1_t read_global_bool1() { return global_bool1; } // CHECK-64-LABEL: define dso_local @read_global_bool4( // CHECK-64-SAME: ) local_unnamed_addr #[[ATTR2]] { // CHECK-64-NEXT: [[ENTRY:.*:]] -// CHECK-64-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr @global_bool4, align 2, !tbaa [[CHAR_TBAA6]] +// CHECK-64-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr @global_bool4, align 2, !tbaa [[CHAR_TBAA10]] // CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i8.v2i8( poison, <2 x i8> [[TMP0]], i64 0) // CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-64-NEXT: ret [[TMP1]] @@ -154,7 +154,7 @@ vbool1_t read_global_bool1() { return global_bool1; } // CHECK-256-LABEL: define dso_local @read_global_bool4( // CHECK-256-SAME: ) local_unnamed_addr #[[ATTR2]] { // CHECK-256-NEXT: [[ENTRY:.*:]] -// CHECK-256-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool4, align 8, !tbaa [[CHAR_TBAA6]] +// CHECK-256-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool4, align 8, !tbaa [[CHAR_TBAA10]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i8.v8i8( poison, <8 x i8> [[TMP0]], i64 0) // CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-256-NEXT: ret [[TMP1]] @@ -165,7 +165,7 @@ vbool4_t read_global_bool4() { return global_bool4; } // CHECK-256-LABEL: define dso_local @read_global_bool32( // CHECK-256-SAME: ) local_unnamed_addr #[[ATTR2]] { // CHECK-256-NEXT: [[ENTRY:.*:]] -// CHECK-256-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr @global_bool32, align 1, !tbaa [[CHAR_TBAA6]] +// CHECK-256-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr @global_bool32, align 1, !tbaa [[CHAR_TBAA10]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i8.v1i8( poison, <1 x i8> [[TMP0]], i64 0) // CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-256-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i1.nxv8i1( [[TMP1]], i64 0) @@ -174,11 +174,11 @@ vbool4_t read_global_bool4() { return global_bool4; } vbool32_t read_global_bool32() { return global_bool32; } #endif //. -// CHECK-64: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK-64: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} -// CHECK-64: [[META8]] = !{!"Simple C/C++ TBAA"} +// CHECK-64: [[META8:![0-9]+]] = !{!"omnipotent char", [[META9:![0-9]+]], i64 0} +// CHECK-64: [[META9]] = !{!"Simple C/C++ TBAA"} +// CHECK-64: [[CHAR_TBAA10]] = !{[[META8]], [[META8]], i64 0} //. -// CHECK-256: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK-256: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} -// CHECK-256: [[META8]] = !{!"Simple C/C++ TBAA"} +// CHECK-256: [[META8:![0-9]+]] = !{!"omnipotent char", [[META9:![0-9]+]], i64 0} +// CHECK-256: [[META9]] = !{!"Simple C/C++ TBAA"} +// CHECK-256: [[CHAR_TBAA10]] = !{[[META8]], [[META8]], i64 0} //. diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c b/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c index d25b8d84aa2d5..2a20607123af9 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c @@ -14,124 +14,124 @@ volatile vector unsigned long long vul; // CHECK-LABEL: define dso_local void @test( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 // CHECK-NEXT: [[ADD_I:%.*]] = add nsw i128 [[TMP3]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = bitcast i128 [[ADD_I]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP4]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP5:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP6:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP4]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP5:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP6:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 // CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to i128 // CHECK-NEXT: [[TMP9:%.*]] = tail call i128 @llvm.s390.vaccq(i128 [[TMP7]], i128 [[TMP8]]) // CHECK-NEXT: [[TMP10:%.*]] = bitcast i128 [[TMP9]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP10]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP11:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP12:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP13:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP10]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP11:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP12:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP13:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP11]] to i128 // CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP12]] to i128 // CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP13]] to i128 // CHECK-NEXT: [[TMP17:%.*]] = tail call i128 @llvm.s390.vacq(i128 [[TMP14]], i128 [[TMP15]], i128 [[TMP16]]) // CHECK-NEXT: [[TMP18:%.*]] = bitcast i128 [[TMP17]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP18]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP19:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP20:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP21:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP18]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP19:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP20:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP21:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i8> [[TMP19]] to i128 // CHECK-NEXT: [[TMP23:%.*]] = bitcast <16 x i8> [[TMP20]] to i128 // CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i8> [[TMP21]] to i128 // CHECK-NEXT: [[TMP25:%.*]] = tail call i128 @llvm.s390.vacccq(i128 [[TMP22]], i128 [[TMP23]], i128 [[TMP24]]) // CHECK-NEXT: [[TMP26:%.*]] = bitcast i128 [[TMP25]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP26]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP27:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP28:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP26]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP27:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP28:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP29:%.*]] = bitcast <16 x i8> [[TMP27]] to i128 // CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x i8> [[TMP28]] to i128 // CHECK-NEXT: [[SUB_I:%.*]] = sub nsw i128 [[TMP29]], [[TMP30]] // CHECK-NEXT: [[TMP31:%.*]] = bitcast i128 [[SUB_I]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP31]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP32:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP33:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP31]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP32:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP33:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP34:%.*]] = bitcast <16 x i8> [[TMP32]] to i128 // CHECK-NEXT: [[TMP35:%.*]] = bitcast <16 x i8> [[TMP33]] to i128 // CHECK-NEXT: [[TMP36:%.*]] = tail call i128 @llvm.s390.vscbiq(i128 [[TMP34]], i128 [[TMP35]]) // CHECK-NEXT: [[TMP37:%.*]] = bitcast i128 [[TMP36]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP37]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP38:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP39:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP40:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP37]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP38:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP39:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP40:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP41:%.*]] = bitcast <16 x i8> [[TMP38]] to i128 // CHECK-NEXT: [[TMP42:%.*]] = bitcast <16 x i8> [[TMP39]] to i128 // CHECK-NEXT: [[TMP43:%.*]] = bitcast <16 x i8> [[TMP40]] to i128 // CHECK-NEXT: [[TMP44:%.*]] = tail call i128 @llvm.s390.vsbiq(i128 [[TMP41]], i128 [[TMP42]], i128 [[TMP43]]) // CHECK-NEXT: [[TMP45:%.*]] = bitcast i128 [[TMP44]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP45]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP46:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP47:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP48:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP45]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP46:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP47:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP48:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP49:%.*]] = bitcast <16 x i8> [[TMP46]] to i128 // CHECK-NEXT: [[TMP50:%.*]] = bitcast <16 x i8> [[TMP47]] to i128 // CHECK-NEXT: [[TMP51:%.*]] = bitcast <16 x i8> [[TMP48]] to i128 // CHECK-NEXT: [[TMP52:%.*]] = tail call i128 @llvm.s390.vsbcbiq(i128 [[TMP49]], i128 [[TMP50]], i128 [[TMP51]]) // CHECK-NEXT: [[TMP53:%.*]] = bitcast i128 [[TMP52]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP53]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP54:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP55:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP53]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP54:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP55:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP56:%.*]] = tail call i128 @llvm.s390.vsumqf(<4 x i32> [[TMP54]], <4 x i32> [[TMP55]]) // CHECK-NEXT: [[TMP57:%.*]] = bitcast i128 [[TMP56]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP57]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP58:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP59:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP57]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP58:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP59:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP60:%.*]] = tail call i128 @llvm.s390.vsumqg(<2 x i64> [[TMP58]], <2 x i64> [[TMP59]]) // CHECK-NEXT: [[TMP61:%.*]] = bitcast i128 [[TMP60]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP61]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP62:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP63:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP61]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP62:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP63:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP64:%.*]] = tail call i128 @llvm.s390.vgfmg(<2 x i64> [[TMP62]], <2 x i64> [[TMP63]]) // CHECK-NEXT: [[TMP65:%.*]] = bitcast i128 [[TMP64]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP65]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP66:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP67:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP68:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP65]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP66:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP67:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP68:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP69:%.*]] = bitcast <16 x i8> [[TMP68]] to i128 // CHECK-NEXT: [[TMP70:%.*]] = tail call i128 @llvm.s390.vgfmag(<2 x i64> [[TMP66]], <2 x i64> [[TMP67]], i128 [[TMP69]]) // CHECK-NEXT: [[TMP71:%.*]] = bitcast i128 [[TMP70]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP71]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP72:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP73:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP74:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP71]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP72:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP73:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP74:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP75:%.*]] = bitcast <16 x i8> [[TMP74]] to i128 // CHECK-NEXT: [[TMP76:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP72]], <2 x i64> [[TMP73]], i128 [[TMP75]], i32 0) // CHECK-NEXT: [[TMP77:%.*]] = bitcast i128 [[TMP76]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP77]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP78:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP79:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP80:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP77]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP78:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP79:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP80:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP81:%.*]] = bitcast <16 x i8> [[TMP80]] to i128 // CHECK-NEXT: [[TMP82:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP78]], <2 x i64> [[TMP79]], i128 [[TMP81]], i32 4) // CHECK-NEXT: [[TMP83:%.*]] = bitcast i128 [[TMP82]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP83]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP84:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP85:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP86:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP83]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP84:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP85:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP86:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP87:%.*]] = bitcast <16 x i8> [[TMP86]] to i128 // CHECK-NEXT: [[TMP88:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP84]], <2 x i64> [[TMP85]], i128 [[TMP87]], i32 8) // CHECK-NEXT: [[TMP89:%.*]] = bitcast i128 [[TMP88]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP89]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP90:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP91:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP92:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP89]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP90:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP91:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP92:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP93:%.*]] = bitcast <16 x i8> [[TMP92]] to i128 // CHECK-NEXT: [[TMP94:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP90]], <2 x i64> [[TMP91]], i128 [[TMP93]], i32 12) // CHECK-NEXT: [[TMP95:%.*]] = bitcast i128 [[TMP94]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP95]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP96:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP97:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP95]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP96:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP97:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[TMP98:%.*]] = tail call <2 x i64> @llvm.s390.vbperm(<16 x i8> [[TMP96]], <16 x i8> [[TMP97]]) -// CHECK-NEXT: store volatile <2 x i64> [[TMP98]], ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <2 x i64> [[TMP98]], ptr @vul, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test(void) { @@ -159,7 +159,7 @@ void test(void) { vul = vec_bperm_u128(vuc, vuc); } //. -// CHECK: [[CHAR_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} -// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} -// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META5:![0-9]+]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK: [[META6]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[CHAR_TBAA7]] = !{[[META5]], [[META5]], i64 0} //. diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c index 5f3b0ec546462..6bd61f9c130bd 100644 --- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c +++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c @@ -17,7 +17,7 @@ __int128 Des __attribute__((aligned(16))); // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2:![0-9]+]] +// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6:![0-9]+]] // CHECK-NEXT: ret void // __int128 f1() { @@ -29,7 +29,7 @@ __int128 f1() { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 16 // CHECK-NEXT: store i128 [[TMP0]], ptr @Ret, align 16 -// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f2() { @@ -40,7 +40,7 @@ __int128 f2() { // CHECK-LABEL: define dso_local void @f3( // CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: store atomic i128 [[TMP0]], ptr @Ptr seq_cst, align 16 // CHECK-NEXT: ret void // @@ -62,9 +62,9 @@ void f4() { // CHECK-LABEL: define dso_local void @f5( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f5() { @@ -77,7 +77,7 @@ __int128 f5() { // CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16 // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: store i128 [[TMP1]], ptr @Ret, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f6() { @@ -88,7 +88,7 @@ __int128 f6() { // CHECK-LABEL: define dso_local noundef zeroext i1 @f7( // CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Des, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Des, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Exp, align 16 // CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP1]], i128 [[TMP0]] seq_cst seq_cst, align 16 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1 @@ -128,10 +128,10 @@ _Bool f8() { // CHECK-LABEL: define dso_local void @f9( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = add i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f9() { @@ -141,10 +141,10 @@ __int128 f9() { // CHECK-LABEL: define dso_local void @f10( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = sub i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f10() { @@ -154,10 +154,10 @@ __int128 f10() { // CHECK-LABEL: define dso_local void @f11( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f11() { @@ -167,10 +167,10 @@ __int128 f11() { // CHECK-LABEL: define dso_local void @f12( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f12() { @@ -180,10 +180,10 @@ __int128 f12() { // CHECK-LABEL: define dso_local void @f13( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = or i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f13() { @@ -193,11 +193,11 @@ __int128 f13() { // CHECK-LABEL: define dso_local void @f14( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP3:%.*]] = xor i128 [[TMP2]], -1 -// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f14() { @@ -207,9 +207,9 @@ __int128 f14() { // CHECK-LABEL: define dso_local void @f15( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f15() { @@ -219,9 +219,9 @@ __int128 f15() { // CHECK-LABEL: define dso_local void @f16( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f16() { @@ -231,9 +231,9 @@ __int128 f16() { // CHECK-LABEL: define dso_local void @f17( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f17() { @@ -243,9 +243,9 @@ __int128 f17() { // CHECK-LABEL: define dso_local void @f18( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f18() { @@ -255,9 +255,9 @@ __int128 f18() { // CHECK-LABEL: define dso_local void @f19( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f19() { @@ -267,17 +267,17 @@ __int128 f19() { // CHECK-LABEL: define dso_local void @f20( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f20() { return __atomic_fetch_nand(&Ptr, Val, memory_order_seq_cst); } //. -// CHECK: [[__INT128_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"__int128", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[__INT128_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"__int128", [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c index 3ac5959a29dcb..332d7bd21faf5 100644 --- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c +++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c @@ -22,7 +22,7 @@ __int128 Des; // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 8 -// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2:![0-9]+]] +// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6:![0-9]+]] // CHECK-NEXT: ret void // __int128 f1() { @@ -34,7 +34,7 @@ __int128 f1() { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 8 // CHECK-NEXT: store i128 [[TMP0]], ptr @Ret, align 8 -// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f2() { @@ -45,7 +45,7 @@ __int128 f2() { // CHECK-LABEL: define dso_local void @f3( // CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: store atomic i128 [[TMP0]], ptr @Ptr seq_cst, align 8 // CHECK-NEXT: ret void // @@ -67,9 +67,9 @@ void f4() { // CHECK-LABEL: define dso_local void @f5( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f5() { @@ -82,7 +82,7 @@ __int128 f5() { // CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8 // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 // CHECK-NEXT: store i128 [[TMP1]], ptr @Ret, align 8 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f6() { @@ -93,7 +93,7 @@ __int128 f6() { // CHECK-LABEL: define dso_local noundef zeroext i1 @f7( // CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Des, align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Des, align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Exp, align 8 // CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP1]], i128 [[TMP0]] seq_cst seq_cst, align 8 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1 @@ -133,10 +133,10 @@ _Bool f8() { // CHECK-LABEL: define dso_local void @f9( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 // CHECK-NEXT: [[TMP2:%.*]] = add i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f9() { @@ -146,10 +146,10 @@ __int128 f9() { // CHECK-LABEL: define dso_local void @f10( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 // CHECK-NEXT: [[TMP2:%.*]] = sub i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f10() { @@ -159,10 +159,10 @@ __int128 f10() { // CHECK-LABEL: define dso_local void @f11( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 // CHECK-NEXT: [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f11() { @@ -172,10 +172,10 @@ __int128 f11() { // CHECK-LABEL: define dso_local void @f12( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 // CHECK-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f12() { @@ -185,10 +185,10 @@ __int128 f12() { // CHECK-LABEL: define dso_local void @f13( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 // CHECK-NEXT: [[TMP2:%.*]] = or i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f13() { @@ -198,11 +198,11 @@ __int128 f13() { // CHECK-LABEL: define dso_local void @f14( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 // CHECK-NEXT: [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP3:%.*]] = xor i128 [[TMP2]], -1 -// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f14() { @@ -212,9 +212,9 @@ __int128 f14() { // CHECK-LABEL: define dso_local void @f15( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f15() { @@ -224,9 +224,9 @@ __int128 f15() { // CHECK-LABEL: define dso_local void @f16( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f16() { @@ -236,9 +236,9 @@ __int128 f16() { // CHECK-LABEL: define dso_local void @f17( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f17() { @@ -248,9 +248,9 @@ __int128 f17() { // CHECK-LABEL: define dso_local void @f18( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f18() { @@ -260,9 +260,9 @@ __int128 f18() { // CHECK-LABEL: define dso_local void @f19( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f19() { @@ -272,17 +272,17 @@ __int128 f19() { // CHECK-LABEL: define dso_local void @f20( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f20() { return __atomic_fetch_nand(&Ptr, Val, memory_order_seq_cst); } //. -// CHECK: [[__INT128_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"__int128", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[__INT128_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"__int128", [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c b/clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c index 601bd7fa16153..ba77cdb29305a 100644 --- a/clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c +++ b/clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c @@ -13,9 +13,9 @@ __int128 OldVal __attribute__((aligned(16))); // CHECK-LABEL: define dso_local void @f1( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f1() { @@ -25,9 +25,9 @@ __int128 f1() { // CHECK-LABEL: define dso_local void @f2( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f2() { @@ -37,9 +37,9 @@ __int128 f2() { // CHECK-LABEL: define dso_local void @f3( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f3() { @@ -49,9 +49,9 @@ __int128 f3() { // CHECK-LABEL: define dso_local void @f4( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f4() { @@ -61,9 +61,9 @@ __int128 f4() { // CHECK-LABEL: define dso_local void @f5( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f5() { @@ -73,9 +73,9 @@ __int128 f5() { // CHECK-LABEL: define dso_local void @f6( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f6() { @@ -85,10 +85,10 @@ __int128 f6() { // CHECK-LABEL: define dso_local void @f7( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = add i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f7() { @@ -98,10 +98,10 @@ __int128 f7() { // CHECK-LABEL: define dso_local void @f8( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = sub i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f8() { @@ -111,10 +111,10 @@ __int128 f8() { // CHECK-LABEL: define dso_local void @f9( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = or i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f9() { @@ -124,10 +124,10 @@ __int128 f9() { // CHECK-LABEL: define dso_local void @f10( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f10() { @@ -137,10 +137,10 @@ __int128 f10() { // CHECK-LABEL: define dso_local void @f11( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f11() { @@ -150,11 +150,11 @@ __int128 f11() { // CHECK-LABEL: define dso_local void @f12( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP3:%.*]] = xor i128 [[TMP2]], -1 -// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f12() { @@ -164,8 +164,8 @@ __int128 f12() { // CHECK-LABEL: define dso_local zeroext i1 @f13( // CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @OldVal, align 16, !tbaa [[__INT128_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @OldVal, align 16, !tbaa [[__INT128_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 16 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1 // CHECK-NEXT: ret i1 [[TMP3]] @@ -177,11 +177,11 @@ _Bool f13() { // CHECK-LABEL: define dso_local void @f14( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @OldVal, align 16, !tbaa [[__INT128_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @OldVal, align 16, !tbaa [[__INT128_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 16 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0 -// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f14() { @@ -191,9 +191,9 @@ __int128 f14() { // CHECK-LABEL: define dso_local void @f15( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f15() { @@ -213,9 +213,9 @@ void f16() { // CHECK-LABEL: define dso_local void @f17( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: ret void // __int128 f17() { @@ -227,8 +227,8 @@ __int128 f17() { // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[T_ADDR:%.*]] = alloca i128, align 8 -// CHECK-NEXT: [[T:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[__INT128_TBAA2]] -// CHECK-NEXT: store i128 [[T]], ptr [[T_ADDR]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[T:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[__INT128_TBAA6]] +// CHECK-NEXT: store i128 [[T]], ptr [[T_ADDR]], align 8, !tbaa [[__INT128_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[T_ADDR]], i128 [[T]], i128 [[T]] seq_cst seq_cst, align 16 // CHECK-NEXT: ret void // @@ -236,8 +236,8 @@ void f18(__int128 t) { __sync_bool_compare_and_swap(({int x = 1; &t;}), t, t); } //. -// CHECK: [[__INT128_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"__int128", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[__INT128_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"__int128", [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/SystemZ/zvector2.c b/clang/test/CodeGen/SystemZ/zvector2.c index f00fcdd52c401..b7994675fab50 100644 --- a/clang/test/CodeGen/SystemZ/zvector2.c +++ b/clang/test/CodeGen/SystemZ/zvector2.c @@ -8,8 +8,8 @@ volatile vector bool int bi; // CHECK-LABEL: define dso_local void @test_assign( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3:![0-9]+]] -// CHECK-NEXT: store volatile <4 x float> [[TMP0]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7:![0-9]+]] +// CHECK-NEXT: store volatile <4 x float> [[TMP0]], ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_assign (void) @@ -20,8 +20,8 @@ void test_assign (void) // CHECK-LABEL: define dso_local void @test_pos( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: store volatile <4 x float> [[TMP0]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: store volatile <4 x float> [[TMP0]], ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_pos (void) @@ -32,9 +32,9 @@ void test_pos (void) // CHECK-LABEL: define dso_local void @test_neg( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[FNEG:%.*]] = fneg <4 x float> [[TMP0]] -// CHECK-NEXT: store volatile <4 x float> [[FNEG]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[FNEG]], ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_neg (void) @@ -45,9 +45,9 @@ void test_neg (void) // CHECK-LABEL: define dso_local void @test_preinc( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[INC:%.*]] = fadd <4 x float> [[TMP0]], splat (float 1.000000e+00) -// CHECK-NEXT: store volatile <4 x float> [[INC]], ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[INC]], ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_preinc (void) @@ -58,9 +58,9 @@ void test_preinc (void) // CHECK-LABEL: define dso_local void @test_postinc( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[INC:%.*]] = fadd <4 x float> [[TMP0]], splat (float 1.000000e+00) -// CHECK-NEXT: store volatile <4 x float> [[INC]], ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[INC]], ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_postinc (void) @@ -71,9 +71,9 @@ void test_postinc (void) // CHECK-LABEL: define dso_local void @test_predec( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[DEC:%.*]] = fadd <4 x float> [[TMP0]], splat (float -1.000000e+00) -// CHECK-NEXT: store volatile <4 x float> [[DEC]], ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[DEC]], ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_predec (void) @@ -84,9 +84,9 @@ void test_predec (void) // CHECK-LABEL: define dso_local void @test_postdec( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[DEC:%.*]] = fadd <4 x float> [[TMP0]], splat (float -1.000000e+00) -// CHECK-NEXT: store volatile <4 x float> [[DEC]], ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[DEC]], ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_postdec (void) @@ -97,10 +97,10 @@ void test_postdec (void) // CHECK-LABEL: define dso_local void @test_add( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store volatile <4 x float> [[ADD]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[ADD]], ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_add (void) @@ -111,10 +111,10 @@ void test_add (void) // CHECK-LABEL: define dso_local void @test_add_assign( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store volatile <4 x float> [[ADD]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[ADD]], ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_add_assign (void) @@ -125,10 +125,10 @@ void test_add_assign (void) // CHECK-LABEL: define dso_local void @test_sub( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[SUB:%.*]] = fsub <4 x float> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store volatile <4 x float> [[SUB]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[SUB]], ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_sub (void) @@ -139,10 +139,10 @@ void test_sub (void) // CHECK-LABEL: define dso_local void @test_sub_assign( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[SUB:%.*]] = fsub <4 x float> [[TMP1]], [[TMP0]] -// CHECK-NEXT: store volatile <4 x float> [[SUB]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[SUB]], ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_sub_assign (void) @@ -153,10 +153,10 @@ void test_sub_assign (void) // CHECK-LABEL: define dso_local void @test_mul( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store volatile <4 x float> [[MUL]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[MUL]], ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_mul (void) @@ -167,10 +167,10 @@ void test_mul (void) // CHECK-LABEL: define dso_local void @test_mul_assign( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store volatile <4 x float> [[MUL]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[MUL]], ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_mul_assign (void) @@ -181,10 +181,10 @@ void test_mul_assign (void) // CHECK-LABEL: define dso_local void @test_div( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[DIV:%.*]] = fdiv <4 x float> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store volatile <4 x float> [[DIV]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[DIV]], ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_div (void) @@ -195,10 +195,10 @@ void test_div (void) // CHECK-LABEL: define dso_local void @test_div_assign( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[DIV:%.*]] = fdiv <4 x float> [[TMP1]], [[TMP0]] -// CHECK-NEXT: store volatile <4 x float> [[DIV]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[DIV]], ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_div_assign (void) @@ -209,11 +209,11 @@ void test_div_assign (void) // CHECK-LABEL: define dso_local void @test_cmpeq( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_cmpeq (void) @@ -224,11 +224,11 @@ void test_cmpeq (void) // CHECK-LABEL: define dso_local void @test_cmpne( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[CMP:%.*]] = fcmp une <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_cmpne (void) @@ -239,11 +239,11 @@ void test_cmpne (void) // CHECK-LABEL: define dso_local void @test_cmpge( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[CMP:%.*]] = fcmp oge <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_cmpge (void) @@ -254,11 +254,11 @@ void test_cmpge (void) // CHECK-LABEL: define dso_local void @test_cmpgt( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[CMP:%.*]] = fcmp ogt <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_cmpgt (void) @@ -269,11 +269,11 @@ void test_cmpgt (void) // CHECK-LABEL: define dso_local void @test_cmple( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[CMP:%.*]] = fcmp ole <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_cmple (void) @@ -284,11 +284,11 @@ void test_cmple (void) // CHECK-LABEL: define dso_local void @test_cmplt( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA7]] // CHECK-NEXT: ret void // void test_cmplt (void) @@ -297,7 +297,7 @@ void test_cmplt (void) } //. -// CHECK: [[CHAR_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} -// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} -// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META5:![0-9]+]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK: [[META6]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[CHAR_TBAA7]] = !{[[META5]], [[META5]], i64 0} //. diff --git a/clang/test/CodeGen/allow-ubsan-check.c b/clang/test/CodeGen/allow-ubsan-check.c index 8d30e29886046..1e128854d6a75 100644 --- a/clang/test/CodeGen/allow-ubsan-check.c +++ b/clang/test/CodeGen/allow-ubsan-check.c @@ -10,27 +10,27 @@ // CHECK-LABEL: define dso_local noundef i32 @div( // CHECK-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = icmp ne i32 [[Y]], 0, !nosanitize [[META2:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[X]], -2147483648, !nosanitize [[META2]] -// CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[Y]], -1, !nosanitize [[META2]] -// CHECK-NEXT: [[OR:%.*]] = or i1 [[TMP1]], [[TMP2]], !nosanitize [[META2]] +// CHECK-NEXT: [[TMP0:%.*]] = icmp ne i32 [[Y]], 0, !nosanitize [[META6:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[X]], -2147483648, !nosanitize [[META6]] +// CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[Y]], -1, !nosanitize [[META6]] +// CHECK-NEXT: [[OR:%.*]] = or i1 [[TMP1]], [[TMP2]], !nosanitize [[META6]] // // 27 == SO_IntegerDivideByZero -// CHECK-NEXT: [[TMP3:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 27), !nosanitize [[META2]] -// CHECK-NEXT: [[TMP4:%.*]] = xor i1 [[TMP3]], true, !nosanitize [[META2]] -// CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP0]], [[TMP4]], !nosanitize [[META2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 27), !nosanitize [[META6]] +// CHECK-NEXT: [[TMP4:%.*]] = xor i1 [[TMP3]], true, !nosanitize [[META6]] +// CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP0]], [[TMP4]], !nosanitize [[META6]] // // 41 == SO_SignedIntegerOverflow -// CHECK-NEXT: [[TMP6:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 41), !nosanitize [[META2]] -// CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true, !nosanitize [[META2]] -// CHECK-NEXT: [[TMP8:%.*]] = or i1 [[OR]], [[TMP7]], !nosanitize [[META2]] -// CHECK-NEXT: [[TMP9:%.*]] = and i1 [[TMP5]], [[TMP8]], !nosanitize [[META2]] -// CHECK-NEXT: br i1 [[TMP9]], label %[[CONT:.*]], label %[[HANDLER_DIVREM_OVERFLOW:.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] +// CHECK-NEXT: [[TMP6:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 41), !nosanitize [[META6]] +// CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true, !nosanitize [[META6]] +// CHECK-NEXT: [[TMP8:%.*]] = or i1 [[OR]], [[TMP7]], !nosanitize [[META6]] +// CHECK-NEXT: [[TMP9:%.*]] = and i1 [[TMP5]], [[TMP8]], !nosanitize [[META6]] +// CHECK-NEXT: br i1 [[TMP9]], label %[[CONT:.*]], label %[[HANDLER_DIVREM_OVERFLOW:.*]], !prof [[PROF7:![0-9]+]], !nosanitize [[META6]] // CHECK: [[HANDLER_DIVREM_OVERFLOW]]: -// CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[X]] to i64, !nosanitize [[META2]] -// CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[Y]] to i64, !nosanitize [[META2]] -// CHECK-NEXT: tail call void @__ubsan_handle_divrem_overflow_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[TMP10]], i64 [[TMP11]]) #[[ATTR6:[0-9]+]], !nosanitize [[META2]] -// CHECK-NEXT: unreachable, !nosanitize [[META2]] +// CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[X]] to i64, !nosanitize [[META6]] +// CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[Y]] to i64, !nosanitize [[META6]] +// CHECK-NEXT: tail call void @__ubsan_handle_divrem_overflow_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[TMP10]], i64 [[TMP11]]) #[[ATTR6:[0-9]+]], !nosanitize [[META6]] +// CHECK-NEXT: unreachable, !nosanitize [[META6]] // CHECK: [[CONT]]: // CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[X]], [[Y]] // CHECK-NEXT: ret i32 [[DIV]] @@ -38,21 +38,21 @@ // TR-LABEL: define dso_local noundef i32 @div( // TR-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // TR-NEXT: [[ENTRY:.*:]] -// TR-NEXT: [[TMP0:%.*]] = icmp ne i32 [[Y]], 0, !nosanitize [[META2:![0-9]+]] -// TR-NEXT: [[TMP1:%.*]] = icmp ne i32 [[X]], -2147483648, !nosanitize [[META2]] -// TR-NEXT: [[TMP2:%.*]] = icmp ne i32 [[Y]], -1, !nosanitize [[META2]] -// TR-NEXT: [[OR:%.*]] = or i1 [[TMP1]], [[TMP2]], !nosanitize [[META2]] -// TR-NEXT: [[TMP3:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 27), !nosanitize [[META2]] -// TR-NEXT: [[TMP4:%.*]] = xor i1 [[TMP3]], true, !nosanitize [[META2]] -// TR-NEXT: [[TMP5:%.*]] = or i1 [[TMP0]], [[TMP4]], !nosanitize [[META2]] -// TR-NEXT: [[TMP6:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 41), !nosanitize [[META2]] -// TR-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true, !nosanitize [[META2]] -// TR-NEXT: [[TMP8:%.*]] = or i1 [[OR]], [[TMP7]], !nosanitize [[META2]] -// TR-NEXT: [[TMP9:%.*]] = and i1 [[TMP5]], [[TMP8]], !nosanitize [[META2]] -// TR-NEXT: br i1 [[TMP9]], label %[[CONT:.*]], label %[[TRAP:.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] +// TR-NEXT: [[TMP0:%.*]] = icmp ne i32 [[Y]], 0, !nosanitize [[META6:![0-9]+]] +// TR-NEXT: [[TMP1:%.*]] = icmp ne i32 [[X]], -2147483648, !nosanitize [[META6]] +// TR-NEXT: [[TMP2:%.*]] = icmp ne i32 [[Y]], -1, !nosanitize [[META6]] +// TR-NEXT: [[OR:%.*]] = or i1 [[TMP1]], [[TMP2]], !nosanitize [[META6]] +// TR-NEXT: [[TMP3:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 27), !nosanitize [[META6]] +// TR-NEXT: [[TMP4:%.*]] = xor i1 [[TMP3]], true, !nosanitize [[META6]] +// TR-NEXT: [[TMP5:%.*]] = or i1 [[TMP0]], [[TMP4]], !nosanitize [[META6]] +// TR-NEXT: [[TMP6:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 41), !nosanitize [[META6]] +// TR-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true, !nosanitize [[META6]] +// TR-NEXT: [[TMP8:%.*]] = or i1 [[OR]], [[TMP7]], !nosanitize [[META6]] +// TR-NEXT: [[TMP9:%.*]] = and i1 [[TMP5]], [[TMP8]], !nosanitize [[META6]] +// TR-NEXT: br i1 [[TMP9]], label %[[CONT:.*]], label %[[TRAP:.*]], !prof [[PROF7:![0-9]+]], !nosanitize [[META6]] // TR: [[TRAP]]: -// TR-NEXT: tail call void @llvm.ubsantrap(i8 3) #[[ATTR7:[0-9]+]], !nosanitize [[META2]] -// TR-NEXT: unreachable, !nosanitize [[META2]] +// TR-NEXT: tail call void @llvm.ubsantrap(i8 3) #[[ATTR7:[0-9]+]], !nosanitize [[META6]] +// TR-NEXT: unreachable, !nosanitize [[META6]] // TR: [[CONT]]: // TR-NEXT: [[DIV:%.*]] = sdiv i32 [[X]], [[Y]] // TR-NEXT: ret i32 [[DIV]] @@ -60,23 +60,23 @@ // REC-LABEL: define dso_local noundef i32 @div( // REC-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // REC-NEXT: [[ENTRY:.*:]] -// REC-NEXT: [[TMP0:%.*]] = icmp ne i32 [[Y]], 0, !nosanitize [[META2:![0-9]+]] -// REC-NEXT: [[TMP1:%.*]] = icmp ne i32 [[X]], -2147483648, !nosanitize [[META2]] -// REC-NEXT: [[TMP2:%.*]] = icmp ne i32 [[Y]], -1, !nosanitize [[META2]] -// REC-NEXT: [[OR:%.*]] = or i1 [[TMP1]], [[TMP2]], !nosanitize [[META2]] -// REC-NEXT: [[TMP3:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 27), !nosanitize [[META2]] -// REC-NEXT: [[TMP4:%.*]] = xor i1 [[TMP3]], true, !nosanitize [[META2]] -// REC-NEXT: [[TMP5:%.*]] = or i1 [[TMP0]], [[TMP4]], !nosanitize [[META2]] -// REC-NEXT: [[TMP6:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 41), !nosanitize [[META2]] -// REC-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true, !nosanitize [[META2]] -// REC-NEXT: [[TMP8:%.*]] = or i1 [[OR]], [[TMP7]], !nosanitize [[META2]] -// REC-NEXT: [[TMP9:%.*]] = and i1 [[TMP5]], [[TMP8]], !nosanitize [[META2]] -// REC-NEXT: br i1 [[TMP9]], label %[[CONT:.*]], label %[[HANDLER_DIVREM_OVERFLOW:.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] +// REC-NEXT: [[TMP0:%.*]] = icmp ne i32 [[Y]], 0, !nosanitize [[META6:![0-9]+]] +// REC-NEXT: [[TMP1:%.*]] = icmp ne i32 [[X]], -2147483648, !nosanitize [[META6]] +// REC-NEXT: [[TMP2:%.*]] = icmp ne i32 [[Y]], -1, !nosanitize [[META6]] +// REC-NEXT: [[OR:%.*]] = or i1 [[TMP1]], [[TMP2]], !nosanitize [[META6]] +// REC-NEXT: [[TMP3:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 27), !nosanitize [[META6]] +// REC-NEXT: [[TMP4:%.*]] = xor i1 [[TMP3]], true, !nosanitize [[META6]] +// REC-NEXT: [[TMP5:%.*]] = or i1 [[TMP0]], [[TMP4]], !nosanitize [[META6]] +// REC-NEXT: [[TMP6:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 41), !nosanitize [[META6]] +// REC-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true, !nosanitize [[META6]] +// REC-NEXT: [[TMP8:%.*]] = or i1 [[OR]], [[TMP7]], !nosanitize [[META6]] +// REC-NEXT: [[TMP9:%.*]] = and i1 [[TMP5]], [[TMP8]], !nosanitize [[META6]] +// REC-NEXT: br i1 [[TMP9]], label %[[CONT:.*]], label %[[HANDLER_DIVREM_OVERFLOW:.*]], !prof [[PROF7:![0-9]+]], !nosanitize [[META6]] // REC: [[HANDLER_DIVREM_OVERFLOW]]: -// REC-NEXT: [[TMP10:%.*]] = zext i32 [[X]] to i64, !nosanitize [[META2]] -// REC-NEXT: [[TMP11:%.*]] = zext i32 [[Y]] to i64, !nosanitize [[META2]] -// REC-NEXT: tail call void @__ubsan_handle_divrem_overflow(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[TMP10]], i64 [[TMP11]]) #[[ATTR6:[0-9]+]], !nosanitize [[META2]] -// REC-NEXT: br label %[[CONT]], !nosanitize [[META2]] +// REC-NEXT: [[TMP10:%.*]] = zext i32 [[X]] to i64, !nosanitize [[META6]] +// REC-NEXT: [[TMP11:%.*]] = zext i32 [[Y]] to i64, !nosanitize [[META6]] +// REC-NEXT: tail call void @__ubsan_handle_divrem_overflow(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[TMP10]], i64 [[TMP11]]) #[[ATTR6:[0-9]+]], !nosanitize [[META6]] +// REC-NEXT: br label %[[CONT]], !nosanitize [[META6]] // REC: [[CONT]]: // REC-NEXT: [[DIV:%.*]] = sdiv i32 [[X]], [[Y]] // REC-NEXT: ret i32 [[DIV]] @@ -88,45 +88,45 @@ int div(int x, int y) { // CHECK-LABEL: define dso_local i32 @null( // CHECK-SAME: ptr noundef readonly captures(address_is_null) [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = icmp eq ptr [[X]], null, !nosanitize [[META2]] +// CHECK-NEXT: [[TMP0:%.*]] = icmp eq ptr [[X]], null, !nosanitize [[META6]] // // 29 == SO_Null -// CHECK-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 29), !nosanitize [[META2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 29), !nosanitize [[META6]] // CHECK-NEXT: [[DOTNOT1:%.*]] = and i1 [[TMP0]], [[TMP1]] -// CHECK-NEXT: br i1 [[DOTNOT1]], label %[[HANDLER_TYPE_MISMATCH:.*]], label %[[CONT:.*]], !prof [[PROF4:![0-9]+]], !nosanitize [[META2]] +// CHECK-NEXT: br i1 [[DOTNOT1]], label %[[HANDLER_TYPE_MISMATCH:.*]], label %[[CONT:.*]], !prof [[PROF8:![0-9]+]], !nosanitize [[META6]] // CHECK: [[HANDLER_TYPE_MISMATCH]]: -// CHECK-NEXT: tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 0) #[[ATTR6]], !nosanitize [[META2]] -// CHECK-NEXT: unreachable, !nosanitize [[META2]] +// CHECK-NEXT: tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 0) #[[ATTR6]], !nosanitize [[META6]] +// CHECK-NEXT: unreachable, !nosanitize [[META6]] // CHECK: [[CONT]]: -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA5:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] // CHECK-NEXT: ret i32 [[TMP2]] // // TR-LABEL: define dso_local i32 @null( // TR-SAME: ptr noundef readonly captures(address_is_null) [[X:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { // TR-NEXT: [[ENTRY:.*:]] -// TR-NEXT: [[TMP0:%.*]] = icmp eq ptr [[X]], null, !nosanitize [[META2]] -// TR-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 29), !nosanitize [[META2]] +// TR-NEXT: [[TMP0:%.*]] = icmp eq ptr [[X]], null, !nosanitize [[META6]] +// TR-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 29), !nosanitize [[META6]] // TR-NEXT: [[DOTNOT1:%.*]] = and i1 [[TMP0]], [[TMP1]] -// TR-NEXT: br i1 [[DOTNOT1]], label %[[TRAP:.*]], label %[[CONT:.*]], !prof [[PROF4:![0-9]+]], !nosanitize [[META2]] +// TR-NEXT: br i1 [[DOTNOT1]], label %[[TRAP:.*]], label %[[CONT:.*]], !prof [[PROF8:![0-9]+]], !nosanitize [[META6]] // TR: [[TRAP]]: -// TR-NEXT: tail call void @llvm.ubsantrap(i8 22) #[[ATTR7]], !nosanitize [[META2]] -// TR-NEXT: unreachable, !nosanitize [[META2]] +// TR-NEXT: tail call void @llvm.ubsantrap(i8 22) #[[ATTR7]], !nosanitize [[META6]] +// TR-NEXT: unreachable, !nosanitize [[META6]] // TR: [[CONT]]: -// TR-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA5:![0-9]+]] +// TR-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] // TR-NEXT: ret i32 [[TMP2]] // // REC-LABEL: define dso_local i32 @null( // REC-SAME: ptr noundef readonly captures(address_is_null) [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // REC-NEXT: [[ENTRY:.*:]] -// REC-NEXT: [[TMP0:%.*]] = icmp eq ptr [[X]], null, !nosanitize [[META2]] -// REC-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 29), !nosanitize [[META2]] +// REC-NEXT: [[TMP0:%.*]] = icmp eq ptr [[X]], null, !nosanitize [[META6]] +// REC-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 29), !nosanitize [[META6]] // REC-NEXT: [[DOTNOT1:%.*]] = and i1 [[TMP0]], [[TMP1]] -// REC-NEXT: br i1 [[DOTNOT1]], label %[[HANDLER_TYPE_MISMATCH:.*]], label %[[CONT:.*]], !prof [[PROF4:![0-9]+]], !nosanitize [[META2]] +// REC-NEXT: br i1 [[DOTNOT1]], label %[[HANDLER_TYPE_MISMATCH:.*]], label %[[CONT:.*]], !prof [[PROF8:![0-9]+]], !nosanitize [[META6]] // REC: [[HANDLER_TYPE_MISMATCH]]: -// REC-NEXT: tail call void @__ubsan_handle_type_mismatch_v1(ptr nonnull @[[GLOB2:[0-9]+]], i64 0) #[[ATTR6]], !nosanitize [[META2]] -// REC-NEXT: br label %[[CONT]], !nosanitize [[META2]] +// REC-NEXT: tail call void @__ubsan_handle_type_mismatch_v1(ptr nonnull @[[GLOB2:[0-9]+]], i64 0) #[[ATTR6]], !nosanitize [[META6]] +// REC-NEXT: br label %[[CONT]], !nosanitize [[META6]] // REC: [[CONT]]: -// REC-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA5:![0-9]+]] +// REC-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] // REC-NEXT: ret i32 [[TMP2]] // int null(int* x) { @@ -136,52 +136,52 @@ int null(int* x) { // CHECK-LABEL: define dso_local noundef i32 @overflow( // CHECK-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X]], i32 [[Y]]), !nosanitize [[META2]] -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1, !nosanitize [[META2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X]], i32 [[Y]]), !nosanitize [[META6]] +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1, !nosanitize [[META6]] // // 41 == SO_SignedIntegerOverflow -// CHECK-NEXT: [[TMP2:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 41), !nosanitize [[META2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 41), !nosanitize [[META6]] // CHECK-NEXT: [[DOTDEMORGAN:%.*]] = and i1 [[TMP1]], [[TMP2]] -// CHECK-NEXT: br i1 [[DOTDEMORGAN]], label %[[HANDLER_ADD_OVERFLOW:.*]], label %[[CONT:.*]], !prof [[PROF4]], !nosanitize [[META2]] +// CHECK-NEXT: br i1 [[DOTDEMORGAN]], label %[[HANDLER_ADD_OVERFLOW:.*]], label %[[CONT:.*]], !prof [[PROF8]], !nosanitize [[META6]] // CHECK: [[HANDLER_ADD_OVERFLOW]]: -// CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[X]] to i64, !nosanitize [[META2]] -// CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[Y]] to i64, !nosanitize [[META2]] -// CHECK-NEXT: tail call void @__ubsan_handle_add_overflow_abort(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[TMP3]], i64 [[TMP4]]) #[[ATTR6]], !nosanitize [[META2]] -// CHECK-NEXT: unreachable, !nosanitize [[META2]] +// CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[X]] to i64, !nosanitize [[META6]] +// CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[Y]] to i64, !nosanitize [[META6]] +// CHECK-NEXT: tail call void @__ubsan_handle_add_overflow_abort(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[TMP3]], i64 [[TMP4]]) #[[ATTR6]], !nosanitize [[META6]] +// CHECK-NEXT: unreachable, !nosanitize [[META6]] // CHECK: [[CONT]]: -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0, !nosanitize [[META2]] +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0, !nosanitize [[META6]] // CHECK-NEXT: ret i32 [[TMP5]] // // TR-LABEL: define dso_local noundef i32 @overflow( // TR-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { // TR-NEXT: [[ENTRY:.*:]] -// TR-NEXT: [[TMP0:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X]], i32 [[Y]]), !nosanitize [[META2]] -// TR-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1, !nosanitize [[META2]] -// TR-NEXT: [[TMP2:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 41), !nosanitize [[META2]] +// TR-NEXT: [[TMP0:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X]], i32 [[Y]]), !nosanitize [[META6]] +// TR-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1, !nosanitize [[META6]] +// TR-NEXT: [[TMP2:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 41), !nosanitize [[META6]] // TR-NEXT: [[DOTDEMORGAN:%.*]] = and i1 [[TMP1]], [[TMP2]] -// TR-NEXT: br i1 [[DOTDEMORGAN]], label %[[TRAP:.*]], label %[[CONT:.*]], !prof [[PROF4]], !nosanitize [[META2]] +// TR-NEXT: br i1 [[DOTDEMORGAN]], label %[[TRAP:.*]], label %[[CONT:.*]], !prof [[PROF8]], !nosanitize [[META6]] // TR: [[TRAP]]: -// TR-NEXT: tail call void @llvm.ubsantrap(i8 0) #[[ATTR7]], !nosanitize [[META2]] -// TR-NEXT: unreachable, !nosanitize [[META2]] +// TR-NEXT: tail call void @llvm.ubsantrap(i8 0) #[[ATTR7]], !nosanitize [[META6]] +// TR-NEXT: unreachable, !nosanitize [[META6]] // TR: [[CONT]]: -// TR-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0, !nosanitize [[META2]] +// TR-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0, !nosanitize [[META6]] // TR-NEXT: ret i32 [[TMP3]] // // REC-LABEL: define dso_local noundef i32 @overflow( // REC-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { // REC-NEXT: [[ENTRY:.*:]] -// REC-NEXT: [[TMP0:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X]], i32 [[Y]]), !nosanitize [[META2]] -// REC-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1, !nosanitize [[META2]] -// REC-NEXT: [[TMP2:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 41), !nosanitize [[META2]] +// REC-NEXT: [[TMP0:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X]], i32 [[Y]]), !nosanitize [[META6]] +// REC-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1, !nosanitize [[META6]] +// REC-NEXT: [[TMP2:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 41), !nosanitize [[META6]] // REC-NEXT: [[DOTDEMORGAN:%.*]] = and i1 [[TMP1]], [[TMP2]] -// REC-NEXT: br i1 [[DOTDEMORGAN]], label %[[HANDLER_ADD_OVERFLOW:.*]], label %[[CONT:.*]], !prof [[PROF4]], !nosanitize [[META2]] +// REC-NEXT: br i1 [[DOTDEMORGAN]], label %[[HANDLER_ADD_OVERFLOW:.*]], label %[[CONT:.*]], !prof [[PROF8]], !nosanitize [[META6]] // REC: [[HANDLER_ADD_OVERFLOW]]: -// REC-NEXT: [[TMP3:%.*]] = zext i32 [[X]] to i64, !nosanitize [[META2]] -// REC-NEXT: [[TMP4:%.*]] = zext i32 [[Y]] to i64, !nosanitize [[META2]] -// REC-NEXT: tail call void @__ubsan_handle_add_overflow(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[TMP3]], i64 [[TMP4]]) #[[ATTR6]], !nosanitize [[META2]] -// REC-NEXT: br label %[[CONT]], !nosanitize [[META2]] +// REC-NEXT: [[TMP3:%.*]] = zext i32 [[X]] to i64, !nosanitize [[META6]] +// REC-NEXT: [[TMP4:%.*]] = zext i32 [[Y]] to i64, !nosanitize [[META6]] +// REC-NEXT: tail call void @__ubsan_handle_add_overflow(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[TMP3]], i64 [[TMP4]]) #[[ATTR6]], !nosanitize [[META6]] +// REC-NEXT: br label %[[CONT]], !nosanitize [[META6]] // REC: [[CONT]]: -// REC-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0, !nosanitize [[META2]] +// REC-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0, !nosanitize [[META6]] // REC-NEXT: ret i32 [[TMP5]] // int overflow(int x, int y) { @@ -200,16 +200,16 @@ void use(double*); // CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[TMP0]], [[IDXPROM]] // // 71 == SO_LocalBounds -// CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.allow.ubsan.check(i8 71), !nosanitize [[META2]] -// CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP1]], [[TMP2]], !nosanitize [[META2]] +// CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.allow.ubsan.check(i8 71), !nosanitize [[META6]] +// CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP1]], [[TMP2]], !nosanitize [[META6]] // CHECK-NEXT: br i1 [[TMP3]], label %[[TRAP:.*]], label %[[BB4:.*]] // CHECK: [[BB4]]: // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA]], i64 [[IDXPROM]] // CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA9:![0-9]+]] // CHECK-NEXT: ret double [[TMP5]] // CHECK: [[TRAP]]: -// CHECK-NEXT: call void @__ubsan_handle_local_out_of_bounds_abort() #[[ATTR6]], !nosanitize [[META2]] -// CHECK-NEXT: unreachable, !nosanitize [[META2]] +// CHECK-NEXT: call void @__ubsan_handle_local_out_of_bounds_abort() #[[ATTR6]], !nosanitize [[META6]] +// CHECK-NEXT: unreachable, !nosanitize [[META6]] // // TR-LABEL: define dso_local double @lbounds( // TR-SAME: i32 noundef [[B:%.*]], i32 noundef [[I:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { @@ -219,16 +219,16 @@ void use(double*); // TR-NEXT: call void @use(ptr noundef nonnull [[VLA]]) #[[ATTR8:[0-9]+]] // TR-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 // TR-NEXT: [[TMP1:%.*]] = icmp ule i64 [[TMP0]], [[IDXPROM]] -// TR-NEXT: [[TMP2:%.*]] = call i1 @llvm.allow.ubsan.check(i8 71), !nosanitize [[META2]] -// TR-NEXT: [[TMP3:%.*]] = and i1 [[TMP1]], [[TMP2]], !nosanitize [[META2]] +// TR-NEXT: [[TMP2:%.*]] = call i1 @llvm.allow.ubsan.check(i8 71), !nosanitize [[META6]] +// TR-NEXT: [[TMP3:%.*]] = and i1 [[TMP1]], [[TMP2]], !nosanitize [[META6]] // TR-NEXT: br i1 [[TMP3]], label %[[TRAP:.*]], label %[[BB4:.*]] // TR: [[BB4]]: // TR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA]], i64 [[IDXPROM]] // TR-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA9:![0-9]+]] // TR-NEXT: ret double [[TMP5]] // TR: [[TRAP]]: -// TR-NEXT: call void @llvm.ubsantrap(i8 71) #[[ATTR7]], !nosanitize [[META2]] -// TR-NEXT: unreachable, !nosanitize [[META2]] +// TR-NEXT: call void @llvm.ubsantrap(i8 71) #[[ATTR7]], !nosanitize [[META6]] +// TR-NEXT: unreachable, !nosanitize [[META6]] // // REC-LABEL: define dso_local double @lbounds( // REC-SAME: i32 noundef [[B:%.*]], i32 noundef [[I:%.*]]) local_unnamed_addr #[[ATTR0]] { @@ -238,16 +238,16 @@ void use(double*); // REC-NEXT: call void @use(ptr noundef nonnull [[VLA]]) #[[ATTR5:[0-9]+]] // REC-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 // REC-NEXT: [[TMP1:%.*]] = icmp ule i64 [[TMP0]], [[IDXPROM]] -// REC-NEXT: [[TMP2:%.*]] = call i1 @llvm.allow.ubsan.check(i8 71), !nosanitize [[META2]] -// REC-NEXT: [[TMP3:%.*]] = and i1 [[TMP1]], [[TMP2]], !nosanitize [[META2]] +// REC-NEXT: [[TMP2:%.*]] = call i1 @llvm.allow.ubsan.check(i8 71), !nosanitize [[META6]] +// REC-NEXT: [[TMP3:%.*]] = and i1 [[TMP1]], [[TMP2]], !nosanitize [[META6]] // REC-NEXT: br i1 [[TMP3]], label %[[TRAP:.*]], label %[[BB4:.*]] // REC: [[BB4]]: // REC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA]], i64 [[IDXPROM]] // REC-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA9:![0-9]+]] // REC-NEXT: ret double [[TMP5]] // REC: [[TRAP]]: -// REC-NEXT: call void @__ubsan_handle_local_out_of_bounds() #[[ATTR6]], !nosanitize [[META2]] -// REC-NEXT: br label %[[BB4]], !nosanitize [[META2]] +// REC-NEXT: call void @__ubsan_handle_local_out_of_bounds() #[[ATTR6]], !nosanitize [[META6]] +// REC-NEXT: br label %[[BB4]], !nosanitize [[META6]] // double lbounds(int b, int i) { double a[b]; @@ -256,33 +256,33 @@ double lbounds(int b, int i) { } //. -// CHECK: [[META2]] = !{} -// CHECK: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1} -// CHECK: [[PROF4]] = !{!"branch_weights", i32 1, i32 1048575} -// CHECK: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} -// CHECK: [[META6]] = !{!"int", [[META7:![0-9]+]], i64 0} -// CHECK: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} -// CHECK: [[META8]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META6]] = !{} +// CHECK: [[PROF7]] = !{!"branch_weights", i32 1048575, i32 1} +// CHECK: [[PROF8]] = !{!"branch_weights", i32 1, i32 1048575} // CHECK: [[DOUBLE_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} -// CHECK: [[META10]] = !{!"double", [[META7]], i64 0} +// CHECK: [[META10]] = !{!"double", [[META4]], i64 0} //. -// TR: [[META2]] = !{} -// TR: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1} -// TR: [[PROF4]] = !{!"branch_weights", i32 1, i32 1048575} -// TR: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} -// TR: [[META6]] = !{!"int", [[META7:![0-9]+]], i64 0} -// TR: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} -// TR: [[META8]] = !{!"Simple C/C++ TBAA"} +// TR: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// TR: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// TR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// TR: [[META5]] = !{!"Simple C/C++ TBAA"} +// TR: [[META6]] = !{} +// TR: [[PROF7]] = !{!"branch_weights", i32 1048575, i32 1} +// TR: [[PROF8]] = !{!"branch_weights", i32 1, i32 1048575} // TR: [[DOUBLE_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} -// TR: [[META10]] = !{!"double", [[META7]], i64 0} +// TR: [[META10]] = !{!"double", [[META4]], i64 0} //. -// REC: [[META2]] = !{} -// REC: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1} -// REC: [[PROF4]] = !{!"branch_weights", i32 1, i32 1048575} -// REC: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} -// REC: [[META6]] = !{!"int", [[META7:![0-9]+]], i64 0} -// REC: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} -// REC: [[META8]] = !{!"Simple C/C++ TBAA"} +// REC: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// REC: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// REC: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// REC: [[META5]] = !{!"Simple C/C++ TBAA"} +// REC: [[META6]] = !{} +// REC: [[PROF7]] = !{!"branch_weights", i32 1048575, i32 1} +// REC: [[PROF8]] = !{!"branch_weights", i32 1, i32 1048575} // REC: [[DOUBLE_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} -// REC: [[META10]] = !{!"double", [[META7]], i64 0} +// REC: [[META10]] = !{!"double", [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c index 847ce67fcc31b..29425a03134e4 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c @@ -32,7 +32,7 @@ DEFINE_STRUCT(bool) // CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i64.v2i64( poison, <2 x i64> [[TMP0]], i64 0) // CHECK-128-NEXT: ret [[CAST_SCALABLE]] // @@ -40,7 +40,7 @@ DEFINE_STRUCT(bool) // CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-256-NEXT: [[ENTRY:.*:]] // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 -// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]] +// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i64.v4i64( poison, <4 x i64> [[TMP0]], i64 0) // CHECK-256-NEXT: ret [[CAST_SCALABLE]] // @@ -48,7 +48,7 @@ DEFINE_STRUCT(bool) // CHECK-512-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-512-NEXT: [[ENTRY:.*:]] // CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64 -// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]] +// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-512-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i64.v8i64( poison, <8 x i64> [[TMP0]], i64 0) // CHECK-512-NEXT: ret [[CAST_SCALABLE]] // @@ -61,7 +61,7 @@ svint64_t read_int64(struct struct_int64 *s) { // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64( [[X]], i64 0) // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 -// CHECK-128-NEXT: store <2 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-128-NEXT: store <2 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: define dso_local void @write_int64( @@ -69,7 +69,7 @@ svint64_t read_int64(struct struct_int64 *s) { // CHECK-256-NEXT: [[ENTRY:.*:]] // CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[X]], i64 0) // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 -// CHECK-256-NEXT: store <4 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-256-NEXT: store <4 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: ret void // // CHECK-512-LABEL: define dso_local void @write_int64( @@ -77,7 +77,7 @@ svint64_t read_int64(struct struct_int64 *s) { // CHECK-512-NEXT: [[ENTRY:.*:]] // CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64( [[X]], i64 0) // CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64 -// CHECK-512-NEXT: store <8 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-512-NEXT: store <8 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-512-NEXT: ret void // void write_int64(struct struct_int64 *s, svint64_t x) { @@ -92,7 +92,7 @@ void write_int64(struct struct_int64 *s, svint64_t x) { // CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2f64.v2f64( poison, <2 x double> [[TMP0]], i64 0) // CHECK-128-NEXT: ret [[CAST_SCALABLE]] // @@ -100,7 +100,7 @@ void write_int64(struct struct_int64 *s, svint64_t x) { // CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-256-NEXT: [[ENTRY:.*:]] // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 -// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2f64.v4f64( poison, <4 x double> [[TMP0]], i64 0) // CHECK-256-NEXT: ret [[CAST_SCALABLE]] // @@ -108,7 +108,7 @@ void write_int64(struct struct_int64 *s, svint64_t x) { // CHECK-512-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-512-NEXT: [[ENTRY:.*:]] // CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64 -// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x double>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x double>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-512-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2f64.v8f64( poison, <8 x double> [[TMP0]], i64 0) // CHECK-512-NEXT: ret [[CAST_SCALABLE]] // @@ -121,7 +121,7 @@ svfloat64_t read_float64(struct struct_float64 *s) { // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x double> @llvm.vector.extract.v2f64.nxv2f64( [[X]], i64 0) // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 -// CHECK-128-NEXT: store <2 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-128-NEXT: store <2 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: define dso_local void @write_float64( @@ -129,7 +129,7 @@ svfloat64_t read_float64(struct struct_float64 *s) { // CHECK-256-NEXT: [[ENTRY:.*:]] // CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x double> @llvm.vector.extract.v4f64.nxv2f64( [[X]], i64 0) // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 -// CHECK-256-NEXT: store <4 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-256-NEXT: store <4 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: ret void // // CHECK-512-LABEL: define dso_local void @write_float64( @@ -137,7 +137,7 @@ svfloat64_t read_float64(struct struct_float64 *s) { // CHECK-512-NEXT: [[ENTRY:.*:]] // CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x double> @llvm.vector.extract.v8f64.nxv2f64( [[X]], i64 0) // CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64 -// CHECK-512-NEXT: store <8 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-512-NEXT: store <8 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-512-NEXT: ret void // void write_float64(struct struct_float64 *s, svfloat64_t x) { @@ -152,7 +152,7 @@ void write_float64(struct struct_float64 *s, svfloat64_t x) { // CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8bf16.v8bf16( poison, <8 x bfloat> [[TMP0]], i64 0) // CHECK-128-NEXT: ret [[CAST_SCALABLE]] // @@ -160,7 +160,7 @@ void write_float64(struct struct_float64 *s, svfloat64_t x) { // CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-256-NEXT: [[ENTRY:.*:]] // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 -// CHECK-256-NEXT: [[TMP0:%.*]] = load <16 x bfloat>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-256-NEXT: [[TMP0:%.*]] = load <16 x bfloat>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8bf16.v16bf16( poison, <16 x bfloat> [[TMP0]], i64 0) // CHECK-256-NEXT: ret [[CAST_SCALABLE]] // @@ -168,7 +168,7 @@ void write_float64(struct struct_float64 *s, svfloat64_t x) { // CHECK-512-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-512-NEXT: [[ENTRY:.*:]] // CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64 -// CHECK-512-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-512-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-512-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8bf16.v32bf16( poison, <32 x bfloat> [[TMP0]], i64 0) // CHECK-512-NEXT: ret [[CAST_SCALABLE]] // @@ -181,7 +181,7 @@ svbfloat16_t read_bfloat16(struct struct_bfloat16 *s) { // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x bfloat> @llvm.vector.extract.v8bf16.nxv8bf16( [[X]], i64 0) // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 -// CHECK-128-NEXT: store <8 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-128-NEXT: store <8 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: define dso_local void @write_bfloat16( @@ -189,7 +189,7 @@ svbfloat16_t read_bfloat16(struct struct_bfloat16 *s) { // CHECK-256-NEXT: [[ENTRY:.*:]] // CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <16 x bfloat> @llvm.vector.extract.v16bf16.nxv8bf16( [[X]], i64 0) // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 -// CHECK-256-NEXT: store <16 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-256-NEXT: store <16 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: ret void // // CHECK-512-LABEL: define dso_local void @write_bfloat16( @@ -197,7 +197,7 @@ svbfloat16_t read_bfloat16(struct struct_bfloat16 *s) { // CHECK-512-NEXT: [[ENTRY:.*:]] // CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <32 x bfloat> @llvm.vector.extract.v32bf16.nxv8bf16( [[X]], i64 0) // CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64 -// CHECK-512-NEXT: store <32 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-512-NEXT: store <32 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-512-NEXT: ret void // void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) { @@ -212,7 +212,7 @@ void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) { // CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 2 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr [[Y]], align 2, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i8.v2i8( poison, <2 x i8> [[TMP0]], i64 0) // CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-128-NEXT: ret [[TMP1]] @@ -221,7 +221,7 @@ void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) { // CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-256-NEXT: [[ENTRY:.*:]] // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 4 -// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]] +// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[Y]], align 2, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i8.v4i8( poison, <4 x i8> [[TMP0]], i64 0) // CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-256-NEXT: ret [[TMP1]] @@ -230,7 +230,7 @@ void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) { // CHECK-512-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-512-NEXT: [[ENTRY:.*:]] // CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8 -// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]] +// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 2, !tbaa [[CHAR_TBAA6]] // CHECK-512-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i8.v8i8( poison, <8 x i8> [[TMP0]], i64 0) // CHECK-512-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-512-NEXT: ret [[TMP1]] @@ -245,7 +245,7 @@ svbool_t read_bool(struct struct_bool *s) { // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast [[X]] to // CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( [[TMP0]], i64 0) // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 2 -// CHECK-128-NEXT: store <2 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]] +// CHECK-128-NEXT: store <2 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: define dso_local void @write_bool( @@ -254,7 +254,7 @@ svbool_t read_bool(struct struct_bool *s) { // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast [[X]] to // CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8( [[TMP0]], i64 0) // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 4 -// CHECK-256-NEXT: store <4 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]] +// CHECK-256-NEXT: store <4 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: ret void // // CHECK-512-LABEL: define dso_local void @write_bool( @@ -263,22 +263,22 @@ svbool_t read_bool(struct struct_bool *s) { // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast [[X]] to // CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8( [[TMP0]], i64 0) // CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8 -// CHECK-512-NEXT: store <8 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]] +// CHECK-512-NEXT: store <8 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[CHAR_TBAA6]] // CHECK-512-NEXT: ret void // void write_bool(struct struct_bool *s, svbool_t x) { s->y[0] = x; } //. -// CHECK-128: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK-128: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} -// CHECK-128: [[META4]] = !{!"Simple C/C++ TBAA"} +// CHECK-128: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-128: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK-128: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0} //. -// CHECK-256: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK-256: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} -// CHECK-256: [[META4]] = !{!"Simple C/C++ TBAA"} +// CHECK-256: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-256: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK-256: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0} //. -// CHECK-512: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK-512: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} -// CHECK-512: [[META4]] = !{!"Simple C/C++ TBAA"} +// CHECK-512: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-512: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK-512: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c index bdaebf7ec1da7..7e3223f671f27 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c @@ -79,7 +79,7 @@ svint64_t lax_cast(fixed_int32_t type) { // CHECK-LABEL: define dso_local @to_svint32_t__from_gnu_int32_t( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]] +// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0]], align 16, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv4i32.v16i32( poison, <16 x i32> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -91,7 +91,7 @@ svint32_t to_svint32_t__from_gnu_int32_t(gnu_int32_t type) { // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i32>) align 16 captures(none) initializes((0, 64)) [[AGG_RESULT:%.*]], [[TYPE:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CAST_FIXED:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32( [[TYPE]], i64 0) -// CHECK-NEXT: store <16 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) { @@ -101,7 +101,7 @@ gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) { // CHECK-LABEL: define dso_local @to_fixed_int32_t__from_gnu_int32_t( // CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv4i32.v16i32( poison, <16 x i32> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -113,14 +113,14 @@ fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) { // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i32>) align 16 captures(none) initializes((0, 64)) [[AGG_RESULT:%.*]], noundef [[TYPE_COERCE:%.*]]) local_unnamed_addr #[[ATTR3]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32( [[TYPE_COERCE]], i64 0) -// CHECK-NEXT: store <16 x i32> [[TYPE]], ptr [[AGG_RESULT]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <16 x i32> [[TYPE]], ptr [[AGG_RESULT]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // gnu_int32_t from_fixed_int32_t__to_gnu_int32_t(fixed_int32_t type) { return type; } //. -// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c index b604a06d76a30..ae011db633b6a 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c @@ -24,14 +24,14 @@ fixed_bool_t global_bool; // CHECK-128-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64( [[V]], i64 0) -// CHECK-128-NEXT: store <2 x i64> [[CAST_FIXED]], ptr @global_i64, align 16, !tbaa [[CHAR_TBAA2:![0-9]+]] +// CHECK-128-NEXT: store <2 x i64> [[CAST_FIXED]], ptr @global_i64, align 16, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-128-NEXT: ret void // // CHECK-512-LABEL: define void @write_global_i64( // CHECK-512-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-512-NEXT: [[ENTRY:.*:]] // CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64( [[V]], i64 0) -// CHECK-512-NEXT: store <8 x i64> [[CAST_FIXED]], ptr @global_i64, align 16, !tbaa [[CHAR_TBAA2:![0-9]+]] +// CHECK-512-NEXT: store <8 x i64> [[CAST_FIXED]], ptr @global_i64, align 16, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-512-NEXT: ret void // void write_global_i64(svint64_t v) { global_i64 = v; } @@ -40,14 +40,14 @@ void write_global_i64(svint64_t v) { global_i64 = v; } // CHECK-128-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x bfloat> @llvm.vector.extract.v8bf16.nxv8bf16( [[V]], i64 0) -// CHECK-128-NEXT: store <8 x bfloat> [[CAST_FIXED]], ptr @global_bf16, align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-128-NEXT: store <8 x bfloat> [[CAST_FIXED]], ptr @global_bf16, align 16, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: ret void // // CHECK-512-LABEL: define void @write_global_bf16( // CHECK-512-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-512-NEXT: [[ENTRY:.*:]] // CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <32 x bfloat> @llvm.vector.extract.v32bf16.nxv8bf16( [[V]], i64 0) -// CHECK-512-NEXT: store <32 x bfloat> [[CAST_FIXED]], ptr @global_bf16, align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-512-NEXT: store <32 x bfloat> [[CAST_FIXED]], ptr @global_bf16, align 16, !tbaa [[CHAR_TBAA6]] // CHECK-512-NEXT: ret void // void write_global_bf16(svbfloat16_t v) { global_bf16 = v; } @@ -57,7 +57,7 @@ void write_global_bf16(svbfloat16_t v) { global_bf16 = v; } // CHECK-128-NEXT: [[ENTRY:.*:]] // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast [[V]] to // CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( [[TMP0]], i64 0) -// CHECK-128-NEXT: store <2 x i8> [[CAST_FIXED]], ptr @global_bool, align 2, !tbaa [[CHAR_TBAA2]] +// CHECK-128-NEXT: store <2 x i8> [[CAST_FIXED]], ptr @global_bool, align 2, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: ret void // // CHECK-512-LABEL: define void @write_global_bool( @@ -65,7 +65,7 @@ void write_global_bf16(svbfloat16_t v) { global_bf16 = v; } // CHECK-512-NEXT: [[ENTRY:.*:]] // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast [[V]] to // CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8( [[TMP0]], i64 0) -// CHECK-512-NEXT: store <8 x i8> [[CAST_FIXED]], ptr @global_bool, align 2, !tbaa [[CHAR_TBAA2]] +// CHECK-512-NEXT: store <8 x i8> [[CAST_FIXED]], ptr @global_bool, align 2, !tbaa [[CHAR_TBAA6]] // CHECK-512-NEXT: ret void // void write_global_bool(svbool_t v) { global_bool = v; } @@ -77,14 +77,14 @@ void write_global_bool(svbool_t v) { global_bool = v; } // CHECK-128-LABEL: define @read_global_i64( // CHECK-128-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-128-NEXT: [[ENTRY:.*:]] -// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @global_i64, align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @global_i64, align 16, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i64.v2i64( poison, <2 x i64> [[TMP0]], i64 0) // CHECK-128-NEXT: ret [[CAST_SCALABLE]] // // CHECK-512-LABEL: define @read_global_i64( // CHECK-512-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-512-NEXT: [[ENTRY:.*:]] -// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr @global_i64, align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr @global_i64, align 16, !tbaa [[CHAR_TBAA6]] // CHECK-512-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i64.v8i64( poison, <8 x i64> [[TMP0]], i64 0) // CHECK-512-NEXT: ret [[CAST_SCALABLE]] // @@ -93,14 +93,14 @@ svint64_t read_global_i64() { return global_i64; } // CHECK-128-LABEL: define @read_global_bf16( // CHECK-128-SAME: ) local_unnamed_addr #[[ATTR2]] { // CHECK-128-NEXT: [[ENTRY:.*:]] -// CHECK-128-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr @global_bf16, align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr @global_bf16, align 16, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8bf16.v8bf16( poison, <8 x bfloat> [[TMP0]], i64 0) // CHECK-128-NEXT: ret [[CAST_SCALABLE]] // // CHECK-512-LABEL: define @read_global_bf16( // CHECK-512-SAME: ) local_unnamed_addr #[[ATTR2]] { // CHECK-512-NEXT: [[ENTRY:.*:]] -// CHECK-512-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, ptr @global_bf16, align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-512-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, ptr @global_bf16, align 16, !tbaa [[CHAR_TBAA6]] // CHECK-512-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8bf16.v32bf16( poison, <32 x bfloat> [[TMP0]], i64 0) // CHECK-512-NEXT: ret [[CAST_SCALABLE]] // @@ -109,7 +109,7 @@ svbfloat16_t read_global_bf16() { return global_bf16; } // CHECK-128-LABEL: define @read_global_bool( // CHECK-128-SAME: ) local_unnamed_addr #[[ATTR2]] { // CHECK-128-NEXT: [[ENTRY:.*:]] -// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr @global_bool, align 2, !tbaa [[CHAR_TBAA2]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr @global_bool, align 2, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i8.v2i8( poison, <2 x i8> [[TMP0]], i64 0) // CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-128-NEXT: ret [[TMP1]] @@ -117,18 +117,18 @@ svbfloat16_t read_global_bf16() { return global_bf16; } // CHECK-512-LABEL: define @read_global_bool( // CHECK-512-SAME: ) local_unnamed_addr #[[ATTR2]] { // CHECK-512-NEXT: [[ENTRY:.*:]] -// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool, align 2, !tbaa [[CHAR_TBAA2]] +// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool, align 2, !tbaa [[CHAR_TBAA6]] // CHECK-512-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i8.v8i8( poison, <8 x i8> [[TMP0]], i64 0) // CHECK-512-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-512-NEXT: ret [[TMP1]] // svbool_t read_global_bool() { return global_bool; } //. -// CHECK-128: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK-128: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} -// CHECK-128: [[META4]] = !{!"Simple C/C++ TBAA"} +// CHECK-128: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-128: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK-128: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0} //. -// CHECK-512: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK-512: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} -// CHECK-512: [[META4]] = !{!"Simple C/C++ TBAA"} +// CHECK-512: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-512: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK-512: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/attr-counted-by-for-pointers.c b/clang/test/CodeGen/attr-counted-by-for-pointers.c index f7b737d5c5039..c5729fd017d8c 100644 --- a/clang/test/CodeGen/attr-counted-by-for-pointers.c +++ b/clang/test/CodeGen/attr-counted-by-for-pointers.c @@ -33,47 +33,47 @@ struct annotated_ptr { // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2:![0-9]+]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT10:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT10:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7:![0-9]+]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3:[0-9]+]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3:[0-9]+]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT10]]: // SANITIZE-WITH-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA4:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA8:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA13:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA14:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test1( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2:![0-9]+]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA6:![0-9]+]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11:![0-9]+]] +// NO-SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA12:![0-9]+]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test1( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2:![0-9]+]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA6:![0-9]+]] // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11:![0-9]+]] +// SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA12:![0-9]+]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test1( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2:![0-9]+]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA6:![0-9]+]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11:![0-9]+]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA12:![0-9]+]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test1(struct annotated_ptr *p, int index, struct foo *value) { @@ -86,47 +86,47 @@ void test1(struct annotated_ptr *p, int index, struct foo *value) { // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT10:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT10:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT10]]: // SANITIZE-WITH-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA8]] // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA13]] +// SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA14]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test2( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] { // NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA6]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]] +// NO-SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA12]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test2( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] { // SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA6]] // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]] +// SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA12]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test2( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] { // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA6]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA12]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test2(struct annotated_ptr *p, int index, struct foo *value) { @@ -139,47 +139,47 @@ void test2(struct annotated_ptr *p, int index, struct foo *value) { // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT10:.*]], !prof [[PROF15:![0-9]+]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT10:.*]], !prof [[PROF16:![0-9]+]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT10]]: // SANITIZE-WITH-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA8]] // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA13]] +// SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA14]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test3( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] { // NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA6]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]] +// NO-SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA12]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test3( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] { // SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA6]] // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]] +// SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA12]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test3( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] { // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA6]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA12]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test3(struct annotated_ptr *p, int index, struct foo *value) { @@ -264,12 +264,12 @@ size_t test5(struct annotated_ptr *p, int index) { // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT8:.*]], !prof [[PROF15]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT8:.*]], !prof [[PROF16]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT8]]: // SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[DOTCOUNTED_BY_LOAD]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = sub nsw i64 [[COUNT]], [[IDXPROM]] @@ -312,12 +312,12 @@ size_t test6(struct annotated_ptr *p, int index) { // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT10:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT10:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT10]]: // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // @@ -384,12 +384,12 @@ size_t test8(struct annotated_sized_ptr *p, int index) { // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT8:.*]], !prof [[PROF15]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT8:.*]], !prof [[PROF16]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT8]]: // SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[DOTCOUNTED_BY_LOAD]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[RESULT:%.*]] = sub nsw i64 [[COUNT]], [[IDXPROM]] @@ -430,12 +430,12 @@ size_t test9(struct annotated_sized_ptr *p, int index) { // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT8:.*]], !prof [[PROF15]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT8:.*]], !prof [[PROF16]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT8]]: // SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[DOTCOUNTED_BY_LOAD]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[INDEX_SIZE:%.*]] = shl nuw nsw i64 [[IDXPROM]], 2 @@ -549,54 +549,54 @@ size_t test12(struct pr151236_struct *p) { return __bdos(p->a) + __bdos(((int *)p->a)); } //. -// SANITIZE-WITH-ATTR: [[META2]] = !{} -// SANITIZE-WITH-ATTR: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1} -// SANITIZE-WITH-ATTR: [[_ZTS3FOOPTR_TBAA4]] = !{[[META5:![0-9]+]], [[META9:![0-9]+]], i64 8} -// SANITIZE-WITH-ATTR: [[META5]] = !{!"annotated_ptr", [[META6:![0-9]+]], i64 0, [[META9]], i64 8, [[META12:![0-9]+]], i64 16} -// SANITIZE-WITH-ATTR: [[META6]] = !{!"long", [[META7:![0-9]+]], i64 0} -// SANITIZE-WITH-ATTR: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} -// SANITIZE-WITH-ATTR: [[META8]] = !{!"Simple C/C++ TBAA"} -// SANITIZE-WITH-ATTR: [[META9]] = !{!"p2 _ZTS3foo", [[META10:![0-9]+]], i64 0} -// SANITIZE-WITH-ATTR: [[META10]] = !{!"any p2 pointer", [[META11:![0-9]+]], i64 0} -// SANITIZE-WITH-ATTR: [[META11]] = !{!"any pointer", [[META7]], i64 0} -// SANITIZE-WITH-ATTR: [[META12]] = !{!"int", [[META7]], i64 0} -// SANITIZE-WITH-ATTR: [[_ZTS3FOOPTR_TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0} -// SANITIZE-WITH-ATTR: [[META14]] = !{!"p1 _ZTS3foo", [[META11]], i64 0} -// SANITIZE-WITH-ATTR: [[PROF15]] = !{!"branch_weights", i32 1, i32 1048575} +// SANITIZE-WITH-ATTR: [[META3:![0-9]+]] = !{!"int", [[META4:![0-9]+]], i64 0} +// SANITIZE-WITH-ATTR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// SANITIZE-WITH-ATTR: [[META5]] = !{!"Simple C/C++ TBAA"} +// SANITIZE-WITH-ATTR: [[META6]] = !{} +// SANITIZE-WITH-ATTR: [[PROF7]] = !{!"branch_weights", i32 1048575, i32 1} +// SANITIZE-WITH-ATTR: [[_ZTS3FOOPTR_TBAA8]] = !{[[META9:![0-9]+]], [[META11:![0-9]+]], i64 8} +// SANITIZE-WITH-ATTR: [[META9]] = !{!"annotated_ptr", [[META10:![0-9]+]], i64 0, [[META11]], i64 8, [[META3]], i64 16} +// SANITIZE-WITH-ATTR: [[META10]] = !{!"long", [[META4]], i64 0} +// SANITIZE-WITH-ATTR: [[META11]] = !{!"p2 _ZTS3foo", [[META12:![0-9]+]], i64 0} +// SANITIZE-WITH-ATTR: [[META12]] = !{!"any p2 pointer", [[META13:![0-9]+]], i64 0} +// SANITIZE-WITH-ATTR: [[META13]] = !{!"any pointer", [[META4]], i64 0} +// SANITIZE-WITH-ATTR: [[_ZTS3FOOPTR_TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0} +// SANITIZE-WITH-ATTR: [[META15]] = !{!"p1 _ZTS3foo", [[META13]], i64 0} +// SANITIZE-WITH-ATTR: [[PROF16]] = !{!"branch_weights", i32 1, i32 1048575} //. -// NO-SANITIZE-WITH-ATTR: [[_ZTS3FOOPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META7:![0-9]+]], i64 8} -// NO-SANITIZE-WITH-ATTR: [[META3]] = !{!"annotated_ptr", [[META4:![0-9]+]], i64 0, [[META7]], i64 8, [[META10:![0-9]+]], i64 16} -// NO-SANITIZE-WITH-ATTR: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0} -// NO-SANITIZE-WITH-ATTR: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -// NO-SANITIZE-WITH-ATTR: [[META6]] = !{!"Simple C/C++ TBAA"} -// NO-SANITIZE-WITH-ATTR: [[META7]] = !{!"p2 _ZTS3foo", [[META8:![0-9]+]], i64 0} -// NO-SANITIZE-WITH-ATTR: [[META8]] = !{!"any p2 pointer", [[META9:![0-9]+]], i64 0} -// NO-SANITIZE-WITH-ATTR: [[META9]] = !{!"any pointer", [[META5]], i64 0} -// NO-SANITIZE-WITH-ATTR: [[META10]] = !{!"int", [[META5]], i64 0} -// NO-SANITIZE-WITH-ATTR: [[_ZTS3FOOPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} -// NO-SANITIZE-WITH-ATTR: [[META12]] = !{!"p1 _ZTS3foo", [[META9]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META3:![0-9]+]] = !{!"int", [[META4:![0-9]+]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META5]] = !{!"Simple C/C++ TBAA"} +// NO-SANITIZE-WITH-ATTR: [[_ZTS3FOOPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META9:![0-9]+]], i64 8} +// NO-SANITIZE-WITH-ATTR: [[META7]] = !{!"annotated_ptr", [[META8:![0-9]+]], i64 0, [[META9]], i64 8, [[META3]], i64 16} +// NO-SANITIZE-WITH-ATTR: [[META8]] = !{!"long", [[META4]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META9]] = !{!"p2 _ZTS3foo", [[META10:![0-9]+]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META10]] = !{!"any p2 pointer", [[META11:![0-9]+]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META11]] = !{!"any pointer", [[META4]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[_ZTS3FOOPTR_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META13]] = !{!"p1 _ZTS3foo", [[META11]], i64 0} //. -// SANITIZE-WITHOUT-ATTR: [[_ZTS3FOOPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META7:![0-9]+]], i64 8} -// SANITIZE-WITHOUT-ATTR: [[META3]] = !{!"annotated_ptr", [[META4:![0-9]+]], i64 0, [[META7]], i64 8, [[META10:![0-9]+]], i64 16} -// SANITIZE-WITHOUT-ATTR: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0} -// SANITIZE-WITHOUT-ATTR: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -// SANITIZE-WITHOUT-ATTR: [[META6]] = !{!"Simple C/C++ TBAA"} -// SANITIZE-WITHOUT-ATTR: [[META7]] = !{!"p2 _ZTS3foo", [[META8:![0-9]+]], i64 0} -// SANITIZE-WITHOUT-ATTR: [[META8]] = !{!"any p2 pointer", [[META9:![0-9]+]], i64 0} -// SANITIZE-WITHOUT-ATTR: [[META9]] = !{!"any pointer", [[META5]], i64 0} -// SANITIZE-WITHOUT-ATTR: [[META10]] = !{!"int", [[META5]], i64 0} -// SANITIZE-WITHOUT-ATTR: [[_ZTS3FOOPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} -// SANITIZE-WITHOUT-ATTR: [[META12]] = !{!"p1 _ZTS3foo", [[META9]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META3:![0-9]+]] = !{!"int", [[META4:![0-9]+]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META5]] = !{!"Simple C/C++ TBAA"} +// SANITIZE-WITHOUT-ATTR: [[_ZTS3FOOPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META9:![0-9]+]], i64 8} +// SANITIZE-WITHOUT-ATTR: [[META7]] = !{!"annotated_ptr", [[META8:![0-9]+]], i64 0, [[META9]], i64 8, [[META3]], i64 16} +// SANITIZE-WITHOUT-ATTR: [[META8]] = !{!"long", [[META4]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META9]] = !{!"p2 _ZTS3foo", [[META10:![0-9]+]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META10]] = !{!"any p2 pointer", [[META11:![0-9]+]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META11]] = !{!"any pointer", [[META4]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[_ZTS3FOOPTR_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META13]] = !{!"p1 _ZTS3foo", [[META11]], i64 0} //. -// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS3FOOPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META7:![0-9]+]], i64 8} -// NO-SANITIZE-WITHOUT-ATTR: [[META3]] = !{!"annotated_ptr", [[META4:![0-9]+]], i64 0, [[META7]], i64 8, [[META10:![0-9]+]], i64 16} -// NO-SANITIZE-WITHOUT-ATTR: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0} -// NO-SANITIZE-WITHOUT-ATTR: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -// NO-SANITIZE-WITHOUT-ATTR: [[META6]] = !{!"Simple C/C++ TBAA"} -// NO-SANITIZE-WITHOUT-ATTR: [[META7]] = !{!"p2 _ZTS3foo", [[META8:![0-9]+]], i64 0} -// NO-SANITIZE-WITHOUT-ATTR: [[META8]] = !{!"any p2 pointer", [[META9:![0-9]+]], i64 0} -// NO-SANITIZE-WITHOUT-ATTR: [[META9]] = !{!"any pointer", [[META5]], i64 0} -// NO-SANITIZE-WITHOUT-ATTR: [[META10]] = !{!"int", [[META5]], i64 0} -// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS3FOOPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} -// NO-SANITIZE-WITHOUT-ATTR: [[META12]] = !{!"p1 _ZTS3foo", [[META9]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META3:![0-9]+]] = !{!"int", [[META4:![0-9]+]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META5]] = !{!"Simple C/C++ TBAA"} +// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS3FOOPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META9:![0-9]+]], i64 8} +// NO-SANITIZE-WITHOUT-ATTR: [[META7]] = !{!"annotated_ptr", [[META8:![0-9]+]], i64 0, [[META9]], i64 8, [[META3]], i64 16} +// NO-SANITIZE-WITHOUT-ATTR: [[META8]] = !{!"long", [[META4]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META9]] = !{!"p2 _ZTS3foo", [[META10:![0-9]+]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META10]] = !{!"any p2 pointer", [[META11:![0-9]+]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META11]] = !{!"any pointer", [[META4]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS3FOOPTR_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META13]] = !{!"p1 _ZTS3foo", [[META11]], i64 0} //. diff --git a/clang/test/CodeGen/attr-counted-by-pr110385.c b/clang/test/CodeGen/attr-counted-by-pr110385.c index 32ee1c8eb5dbe..e854ed48e2c58 100644 --- a/clang/test/CodeGen/attr-counted-by-pr110385.c +++ b/clang/test/CodeGen/attr-counted-by-pr110385.c @@ -29,7 +29,7 @@ void init(void * __attribute__((pass_dynamic_object_size(0)))); // CHECK-SAME: ptr noundef readonly captures(none) [[FOO:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[GROWABLE:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[GROWABLE]], align 8, !tbaa [[_ZTS8VARIABLEPTR_TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[GROWABLE]], align 8, !tbaa [[_ZTS8VARIABLEPTR_TBAA6:![0-9]+]] // CHECK-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 12 // CHECK-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8 // CHECK-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 @@ -61,11 +61,11 @@ void test2(struct bucket2 *foo) { init(foo->growable.array); } //. -// CHECK: [[_ZTS8VARIABLEPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META7:![0-9]+]], i64 8} -// CHECK: [[META3]] = !{!"bucket", [[META4:![0-9]+]], i64 0, [[META7]], i64 8, [[META4]], i64 16} -// CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} -// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -// CHECK: [[META6]] = !{!"Simple C/C++ TBAA"} -// CHECK: [[META7]] = !{!"p1 _ZTS8variable", [[META8:![0-9]+]], i64 0} -// CHECK: [[META8]] = !{!"any pointer", [[META5]], i64 0} +// CHECK: [[META3:![0-9]+]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[_ZTS8VARIABLEPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META8:![0-9]+]], i64 8} +// CHECK: [[META7]] = !{!"bucket", [[META3]], i64 0, [[META8]], i64 8, [[META3]], i64 16} +// CHECK: [[META8]] = !{!"p1 _ZTS8variable", [[META9:![0-9]+]], i64 0} +// CHECK: [[META9]] = !{!"any pointer", [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c index 9675fe21be366..86c59fb2b14ea 100644 --- a/clang/test/CodeGen/attr-counted-by.c +++ b/clang/test/CodeGen/attr-counted-by.c @@ -64,16 +64,16 @@ struct anon_struct { // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2:![0-9]+]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7:![0-9]+]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8:[0-9]+]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8:[0-9]+]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT3]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test1( @@ -112,18 +112,18 @@ void test1(struct annotated *p, int index, int val) { // SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT6:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT6:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT6]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]] // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0) // SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = shl i32 [[TMP2]], 2 -// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test2( @@ -235,16 +235,16 @@ size_t test2_bdos_cast(struct annotated *p) { // SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT3]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]] -// SANITIZE-WITH-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test3( @@ -331,63 +331,63 @@ size_t test3_bdos_cast(struct annotated *p) { // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 2 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 3) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 3) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT1]]: // SANITIZE-WITH-ATTR-NEXT: [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl i32 [[DOTCOUNTED_BY_LOAD]], 2 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT12:.*]], label %[[HANDLER_OUT_OF_BOUNDS8:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT12:.*]], label %[[HANDLER_OUT_OF_BOUNDS8:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS8]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT12]]: // SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[DOTCOUNTED_BY_LOAD]], 2 // SANITIZE-WITH-ATTR-NEXT: [[RESULT:%.*]] = add i32 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], 244 // SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = and i32 [[RESULT]], 252 // SANITIZE-WITH-ATTR-NEXT: [[CONV2:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 0 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV2]], ptr [[ARRAYIDX10]], align 4, !tbaa [[INT_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV2]], ptr [[ARRAYIDX10]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITH-ATTR-NEXT: [[DOTNOT81:%.*]] = icmp eq i32 [[DOTCOUNTED_BY_LOAD]], 3 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT81]], label %[[HANDLER_OUT_OF_BOUNDS18:.*]], label %[[CONT19:.*]], !prof [[PROF8:![0-9]+]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT81]], label %[[HANDLER_OUT_OF_BOUNDS18:.*]], label %[[CONT19:.*]], !prof [[PROF8:![0-9]+]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS18]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 4) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 4) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT19]]: // SANITIZE-WITH-ATTR-NEXT: [[ADD:%.*]] = add nsw i32 [[INDEX]], 1 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM31:%.*]] = sext i32 [[ADD]] to i64 -// SANITIZE-WITH-ATTR-NEXT: [[TMP5:%.*]] = icmp ult i64 [[IDXPROM31]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP5]], label %[[CONT38:.*]], label %[[HANDLER_OUT_OF_BOUNDS34:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP5:%.*]] = icmp ult i64 [[IDXPROM31]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP5]], label %[[CONT38:.*]], label %[[HANDLER_OUT_OF_BOUNDS34:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS34]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[IDXPROM31]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[IDXPROM31]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT38]]: // SANITIZE-WITH-ATTR-NEXT: [[TMP6:%.*]] = icmp sgt i32 [[DOTCOUNTED_BY_LOAD]], 3 // SANITIZE-WITH-ATTR-NEXT: [[RESULT25:%.*]] = add i32 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], 240 // SANITIZE-WITH-ATTR-NEXT: [[TMP7:%.*]] = and i32 [[RESULT25]], 252 // SANITIZE-WITH-ATTR-NEXT: [[CONV27:%.*]] = select i1 [[TMP6]], i32 [[TMP7]], i32 0 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM31]] -// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV27]], ptr [[ARRAYIDX36]], align 4, !tbaa [[INT_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV27]], ptr [[ARRAYIDX36]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM42:%.*]] = sext i32 [[FAM_IDX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD44:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP8:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD44]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM42]], [[TMP8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS45:.*]], label %[[CONT46:.*]], !prof [[PROF8]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP8:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD44]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM42]], [[TMP8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS45:.*]], label %[[CONT46:.*]], !prof [[PROF8]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS45]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM42]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM42]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT46]]: // SANITIZE-WITH-ATTR-NEXT: [[ADD59:%.*]] = add nsw i32 [[INDEX]], 2 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM60:%.*]] = sext i32 [[ADD59]] to i64 -// SANITIZE-WITH-ATTR-NEXT: [[TMP9:%.*]] = icmp ult i64 [[IDXPROM60]], [[TMP8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP9]], label %[[CONT67:.*]], label %[[HANDLER_OUT_OF_BOUNDS63:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP9:%.*]] = icmp ult i64 [[IDXPROM60]], [[TMP8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP9]], label %[[CONT67:.*]], label %[[HANDLER_OUT_OF_BOUNDS63:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS63]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB10:[0-9]+]], i64 [[IDXPROM60]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB10:[0-9]+]], i64 [[IDXPROM60]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT67]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX65:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM60]] // SANITIZE-WITH-ATTR-NEXT: [[COUNT50:%.*]] = sext i32 [[DOTCOUNTED_BY_LOAD44]] to i64 @@ -396,7 +396,7 @@ size_t test3_bdos_cast(struct annotated *p) { // SANITIZE-WITH-ATTR-NEXT: [[DOTTR:%.*]] = trunc nuw nsw i64 [[TMP11]] to i32 // SANITIZE-WITH-ATTR-NEXT: [[CONV54:%.*]] = shl i32 [[DOTTR]], 2 // SANITIZE-WITH-ATTR-NEXT: [[CONV55:%.*]] = and i32 [[CONV54]], 252 -// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV55]], ptr [[ARRAYIDX65]], align 4, !tbaa [[INT_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV55]], ptr [[ARRAYIDX65]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test4( @@ -475,12 +475,12 @@ void test4(struct annotated *p, int index, int fam_idx) { // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT1:.*]], !prof [[PROF8]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT1:.*]], !prof [[PROF8]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT1]]: // SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[DOTCOUNTED_BY_LOAD]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = sub nsw i64 [[COUNT]], [[IDXPROM]] @@ -613,15 +613,15 @@ size_t test4_bdos_cast2(struct annotated *p, int index) { // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i64, ptr [[DOTCOUNTED_BY_GEP]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[DOTCOUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[DOTCOUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT3]]: // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test5( @@ -685,18 +685,18 @@ size_t test5_bdos(struct anon_struct *p) { // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i64, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[COUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT6:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[COUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT6:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB13:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB13:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT6]]: // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] // SANITIZE-WITH-ATTR-NEXT: [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl nuw i64 [[COUNTED_BY_LOAD]], 2 // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], i64 0) // SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 -// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test6( @@ -773,12 +773,12 @@ size_t test6_bdos(struct anon_struct *p) { // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i8, ptr [[TMP0]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i8 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT7:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i8 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT7:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB15:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB15:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT7]]: // SANITIZE-WITH-ATTR-NEXT: [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[INTS]], i64 [[IDXPROM]] @@ -846,12 +846,12 @@ size_t test7_bdos(struct union_of_fams *p) { // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i8, ptr [[TMP0]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i8 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT14:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i8 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT14:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB16:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB16:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT14]]: // SANITIZE-WITH-ATTR-NEXT: [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[INTS]], i64 [[IDXPROM]] @@ -927,12 +927,12 @@ size_t test8_bdos(struct union_of_fams *p) { // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[TMP0]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT7:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT7:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB18:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB18:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT7]]: // SANITIZE-WITH-ATTR-NEXT: [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[BYTES]], i64 [[IDXPROM]] @@ -1000,12 +1000,12 @@ size_t test9_bdos(struct union_of_fams *p) { // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[TMP0]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT14:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT14:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB19:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB19:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT14]]: // SANITIZE-WITH-ATTR-NEXT: [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[BYTES]], i64 [[IDXPROM]] @@ -1087,12 +1087,12 @@ size_t test10_bdos(struct union_of_fams *p) { // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT6:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT6:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT6]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM]] @@ -1100,7 +1100,7 @@ size_t test10_bdos(struct union_of_fams *p) { // SANITIZE-WITH-ATTR-NEXT: [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl i32 [[COUNTED_BY_LOAD]], 2 // SANITIZE-WITH-ATTR-NEXT: [[RESULT:%.*]] = add i32 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], 8 // SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = select i1 [[TMP2]], i32 [[RESULT]], i32 0 -// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test11( @@ -1201,23 +1201,23 @@ int test12_a, test12_b; // SANITIZE-WITH-ATTR-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT10:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ult i32 [[INDEX]], 6 // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[INDEX]] to i64 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB22:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB22:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[BAZ]], i64 [[TMP1]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] -// SANITIZE-WITH-ATTR-NEXT: store i32 [[TMP2]], ptr @test12_b, align 4, !tbaa [[INT_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[TMP2]], ptr @test12_b, align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr @test12_foo, align 4 // SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[DOTCOUNTED_BY_LOAD]], 0 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS4:.*]], label %[[HANDLER_TYPE_MISMATCH6:.*]], !prof [[PROF8]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS4:.*]], label %[[HANDLER_TYPE_MISMATCH6:.*]], !prof [[PROF8]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS4]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB24:[0-9]+]], i64 0) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB24:[0-9]+]], i64 0) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_TYPE_MISMATCH6]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB25:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4) to i64)) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB25:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4) to i64)) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i32 @test12( // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { @@ -1302,12 +1302,12 @@ struct test13_bar { // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[_ZTS10TEST13_BARPTR_TBAA11:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[INDEX]], [[TMP1]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT5:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[INDEX]], [[TMP1]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT5:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB28:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB28:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT5]]: // SANITIZE-WITH-ATTR-NEXT: [[REVMAP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[REVMAP]], i64 [[INDEX]] @@ -1364,11 +1364,11 @@ struct test14_foo { // SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IDX]], 0 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64 -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB29:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB29:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT3]]: // SANITIZE-WITH-ATTR-NEXT: ret i32 undef // @@ -1418,11 +1418,11 @@ int test14(int idx) { // SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IDX]], 0 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64 -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB31:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB31:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT1]]: // SANITIZE-WITH-ATTR-NEXT: ret i32 undef // @@ -1469,10 +1469,10 @@ int test15(int idx) { // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 680 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 1 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB32:[0-9]+]], i64 2) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB32:[0-9]+]], i64 2) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT1]]: // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // @@ -1607,13 +1607,13 @@ struct tests_foo { // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 40 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 10 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT4:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT4:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB33:[0-9]+]], i64 10) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB33:[0-9]+]], i64 10) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT4]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 84 -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITH-ATTR-NEXT: ret i32 [[TMP1]] // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test24( @@ -1648,13 +1648,13 @@ int test24(int c, struct tests_foo *var) { // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[_ZTS9TESTS_FOOPTR_TBAA17:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[TMP0]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 10 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT5:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT5:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB34:[0-9]+]], i64 10) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB34:[0-9]+]], i64 10) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT5]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 44 -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITH-ATTR-NEXT: ret i32 [[TMP2]] // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test25( @@ -1698,16 +1698,16 @@ struct test26_foo { // SANITIZE-WITH-ATTR-NEXT: [[S:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 4 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[C]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[S]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT5:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT5:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB35:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB35:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT5]]: // SANITIZE-WITH-ATTR-NEXT: [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARR]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITH-ATTR-NEXT: ret i32 [[TMP2]] // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test26( @@ -1769,12 +1769,12 @@ struct test27_foo { // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB37:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB37:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT3]]: // SANITIZE-WITH-ATTR-NEXT: [[ENTRIES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 24 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[ENTRIES]], i64 [[IDXPROM]] @@ -1835,16 +1835,16 @@ struct test28_foo { // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP4]], label %[[CONT17:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP3]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP4]], label %[[CONT17:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB39:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB39:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT17]]: // SANITIZE-WITH-ATTR-NEXT: [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARR]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITH-ATTR-NEXT: ret i32 [[TMP5]] // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test28( @@ -1899,28 +1899,28 @@ struct annotated_struct_array { // SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ult i32 [[IDX1]], 10 // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[IDX1]] to i64 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB41:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB41:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT3]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[ANN]], i64 [[TMP1]] // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS9ANNOTATEDPTR_TBAA23:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM27:%.*]] = sext i32 [[IDX2]] to i64 -// SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = icmp ult i64 [[IDXPROM27]], [[TMP3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP4]], label %[[CONT32:.*]], label %[[HANDLER_OUT_OF_BOUNDS28:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = icmp ult i64 [[IDXPROM27]], [[TMP3]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP4]], label %[[CONT32:.*]], label %[[HANDLER_OUT_OF_BOUNDS28:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS28]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB42:[0-9]+]], i64 [[IDXPROM27]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB42:[0-9]+]], i64 [[IDXPROM27]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT32]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM27]] // SANITIZE-WITH-ATTR-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0) // SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = shl i32 [[TMP5]], 2 -// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX30]], align 4, !tbaa [[INT_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX30]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test29( @@ -1987,9 +1987,9 @@ struct test30_struct { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test30( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] { // SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[IDX]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB44:[0-9]+]], i64 [[TMP0]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[IDX]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB44:[0-9]+]], i64 [[TMP0]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test30( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { @@ -2062,21 +2062,21 @@ struct annotated_with_array { // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR0]] { // SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ult i32 [[IDX2]], 43 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[IDX2]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB46:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[IDX2]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB46:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT1]]: // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 336 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM4:%.*]] = sext i32 [[IDX1]] to i64 -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = icmp ult i64 [[IDXPROM4]], [[TMP2]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP3]], label %[[CONT9:.*]], label %[[HANDLER_OUT_OF_BOUNDS5:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = icmp ult i64 [[IDXPROM4]], [[TMP2]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP3]], label %[[CONT9:.*]], label %[[HANDLER_OUT_OF_BOUNDS5:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS5]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB48:[0-9]+]], i64 [[IDXPROM4]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB48:[0-9]+]], i64 [[IDXPROM4]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT9]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 344 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i64, ptr [[ARRAY]], i64 [[IDXPROM4]] @@ -2143,11 +2143,11 @@ void test32(struct annotated_with_array *ptr, int idx1, int idx2) { // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ult i32 [[INDEX]], 43 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[INDEX]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB49:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[INDEX]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB49:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT1]]: // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 336 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 @@ -2280,18 +2280,18 @@ struct multi_subscripts { // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR0]] { // SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ult i32 [[IDX1]], 42 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[IDX1]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB51:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[IDX1]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB51:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT1]]: // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i32 [[IDX2]], 43 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS2:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS2:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS2]]: // SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = zext i32 [[IDX2]] to i64 -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB52:[0-9]+]], i64 [[TMP3]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB52:[0-9]+]], i64 [[TMP3]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT3]]: // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // @@ -2333,16 +2333,16 @@ size_t test34(struct multi_subscripts *ptr, int idx1, int idx2) { // SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB53:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB53:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META6]] +// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META6]] // SANITIZE-WITH-ATTR: [[CONT3]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]] -// SANITIZE-WITH-ATTR-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test35( @@ -2470,19 +2470,19 @@ size_t test37(struct annotated *ptr) { return __builtin_dynamic_object_size((1, 2, (4, 5, (7, 8, 9, (10, ptr->array)))), 1); } //. -// SANITIZE-WITH-ATTR: [[META2]] = !{} -// SANITIZE-WITH-ATTR: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1} -// SANITIZE-WITH-ATTR: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -// SANITIZE-WITH-ATTR: [[META5]] = !{!"int", [[META6:![0-9]+]], i64 0} -// SANITIZE-WITH-ATTR: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} -// SANITIZE-WITH-ATTR: [[META7]] = !{!"Simple C/C++ TBAA"} +// SANITIZE-WITH-ATTR: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// SANITIZE-WITH-ATTR: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// SANITIZE-WITH-ATTR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// SANITIZE-WITH-ATTR: [[META5]] = !{!"Simple C/C++ TBAA"} +// SANITIZE-WITH-ATTR: [[META6]] = !{} +// SANITIZE-WITH-ATTR: [[PROF7]] = !{!"branch_weights", i32 1048575, i32 1} // SANITIZE-WITH-ATTR: [[PROF8]] = !{!"branch_weights", i32 1, i32 1048575} -// SANITIZE-WITH-ATTR: [[CHAR_TBAA9]] = !{[[META6]], [[META6]], i64 0} +// SANITIZE-WITH-ATTR: [[CHAR_TBAA9]] = !{[[META4]], [[META4]], i64 0} // SANITIZE-WITH-ATTR: [[TBAA_STRUCT10]] = !{i64 0, i64 24, [[CHAR_TBAA9]]} // SANITIZE-WITH-ATTR: [[_ZTS10TEST13_BARPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META13:![0-9]+]], i64 0} // SANITIZE-WITH-ATTR: [[META12]] = !{!"test13_foo", [[META13]], i64 0} // SANITIZE-WITH-ATTR: [[META13]] = !{!"p1 _ZTS10test13_bar", [[META14:![0-9]+]], i64 0} -// SANITIZE-WITH-ATTR: [[META14]] = !{!"any pointer", [[META6]], i64 0} +// SANITIZE-WITH-ATTR: [[META14]] = !{!"any pointer", [[META4]], i64 0} // SANITIZE-WITH-ATTR: [[_ZTS10TEST13_FOOPTR_TBAA15]] = !{[[META16:![0-9]+]], [[META16]], i64 0} // SANITIZE-WITH-ATTR: [[META16]] = !{!"p1 _ZTS10test13_foo", [[META14]], i64 0} // SANITIZE-WITH-ATTR: [[_ZTS9TESTS_FOOPTR_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} @@ -2494,7 +2494,7 @@ size_t test37(struct annotated *ptr) { // SANITIZE-WITH-ATTR: [[_ZTS9ANNOTATEDPTR_TBAA23]] = !{[[META24:![0-9]+]], [[META24]], i64 0} // SANITIZE-WITH-ATTR: [[META24]] = !{!"p1 _ZTS9annotated", [[META14]], i64 0} // SANITIZE-WITH-ATTR: [[LONG_TBAA25]] = !{[[META26:![0-9]+]], [[META26]], i64 0} -// SANITIZE-WITH-ATTR: [[META26]] = !{!"long", [[META6]], i64 0} +// SANITIZE-WITH-ATTR: [[META26]] = !{!"long", [[META4]], i64 0} //. // NO-SANITIZE-WITH-ATTR: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} // NO-SANITIZE-WITH-ATTR: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} diff --git a/clang/test/CodeGen/builtin-maximumnum-minimumnum.c b/clang/test/CodeGen/builtin-maximumnum-minimumnum.c index ea9d2e7a4ed38..aa18d9ca217f7 100644 --- a/clang/test/CodeGen/builtin-maximumnum-minimumnum.c +++ b/clang/test/CodeGen/builtin-maximumnum-minimumnum.c @@ -12,10 +12,10 @@ typedef long double ldouble2 __attribute__((ext_vector_type(2))); // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16 -// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2:![0-9]+]] -// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6:![0-9]+]] +// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] // CHECK-NEXT: [[ELT_MINIMUMNUM:%.*]] = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[ELT_MINIMUMNUM]] // @@ -27,10 +27,10 @@ half8 pfmin16(half8 a, half8 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16 -// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] // CHECK-NEXT: [[ELT_MINIMUMNUM:%.*]] = call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]]) // CHECK-NEXT: ret <8 x bfloat> [[ELT_MINIMUMNUM]] // @@ -42,10 +42,10 @@ bf16x8 pfmin16b(bf16x8 a, bf16x8 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] // CHECK-NEXT: [[ELT_MINIMUMNUM:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[ELT_MINIMUMNUM]] // @@ -57,10 +57,10 @@ float4 pfmin32(float4 a, float4 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] // CHECK-NEXT: [[ELT_MINIMUMNUM:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) // CHECK-NEXT: ret <2 x double> [[ELT_MINIMUMNUM]] // @@ -72,12 +72,12 @@ double2 pfmin64(double2 a, double2 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 -// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]] // CHECK-NEXT: [[ELT_MINIMUMNUM:%.*]] = call <2 x x86_fp80> @llvm.minimumnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]]) // CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINIMUMNUM]] // @@ -90,10 +90,10 @@ ldouble2 pfmin80(ldouble2 a, ldouble2 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16 -// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] // CHECK-NEXT: [[ELT_MAXIMUMNUM:%.*]] = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[ELT_MAXIMUMNUM]] // @@ -105,10 +105,10 @@ half8 pfmax16(half8 a, half8 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16 -// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] // CHECK-NEXT: [[ELT_MAXIMUMNUM:%.*]] = call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]]) // CHECK-NEXT: ret <8 x bfloat> [[ELT_MAXIMUMNUM]] // @@ -120,10 +120,10 @@ bf16x8 pfmax16b(bf16x8 a, bf16x8 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] // CHECK-NEXT: [[ELT_MAXIMUMNUM:%.*]] = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[ELT_MAXIMUMNUM]] // @@ -135,10 +135,10 @@ float4 pfmax32(float4 a, float4 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] // CHECK-NEXT: [[ELT_MAXIMUMNUM:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) // CHECK-NEXT: ret <2 x double> [[ELT_MAXIMUMNUM]] // @@ -151,12 +151,12 @@ double2 pfmax64(double2 a, double2 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 -// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]] // CHECK-NEXT: [[ELT_MINIMUMNUM:%.*]] = call <2 x x86_fp80> @llvm.minimumnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]]) // CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINIMUMNUM]] // @@ -165,7 +165,7 @@ ldouble2 pfmax80(ldouble2 a, ldouble2 b) { } //. -// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"Simple C++ TBAA"} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK: [[TBAA6]] = !{[[META4]], [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/builtin-maxnum-minnum.c b/clang/test/CodeGen/builtin-maxnum-minnum.c index 2455f3b616ce7..d05d43c23bf27 100644 --- a/clang/test/CodeGen/builtin-maxnum-minnum.c +++ b/clang/test/CodeGen/builtin-maxnum-minnum.c @@ -12,10 +12,10 @@ typedef long double ldouble2 __attribute__((ext_vector_type(2))); // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16 -// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]] -// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA6:![0-9]+]] +// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[ELT_MINNUM]] // @@ -27,10 +27,10 @@ half8 pfmin16(half8 a, half8 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16 -// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]]) // CHECK-NEXT: ret <8 x bfloat> [[ELT_MINNUM]] // @@ -42,10 +42,10 @@ bf16x8 pfmin16b(bf16x8 a, bf16x8 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[ELT_MINNUM]] // @@ -57,10 +57,10 @@ float4 pfmin32(float4 a, float4 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) // CHECK-NEXT: ret <2 x double> [[ELT_MINNUM]] // @@ -72,12 +72,12 @@ double2 pfmin64(double2 a, double2 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 -// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]]) // CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINNUM]] // @@ -90,10 +90,10 @@ ldouble2 pfmin80(ldouble2 a, ldouble2 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16 -// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[ELT_MAXNUM]] // @@ -105,10 +105,10 @@ half8 pfmax16(half8 a, half8 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16 -// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]]) // CHECK-NEXT: ret <8 x bfloat> [[ELT_MAXNUM]] // @@ -120,10 +120,10 @@ bf16x8 pfmax16b(bf16x8 a, bf16x8 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[ELT_MAXNUM]] // @@ -135,10 +135,10 @@ float4 pfmax32(float4 a, float4 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) // CHECK-NEXT: ret <2 x double> [[ELT_MAXNUM]] // @@ -151,12 +151,12 @@ double2 pfmax64(double2 a, double2 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 -// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[CHAR_TBAA2]] -// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[CHAR_TBAA6]] +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]]) // CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINNUM]] // @@ -165,7 +165,7 @@ ldouble2 pfmax80(ldouble2 a, ldouble2 b) { } //. -// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"Simple C++ TBAA"} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/errno-tbaa.c b/clang/test/CodeGen/errno-tbaa.c new file mode 100644 index 0000000000000..4ca62a37babf2 --- /dev/null +++ b/clang/test/CodeGen/errno-tbaa.c @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -O0 -o - %s | FileCheck %s --check-prefix=NOTBAA +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -O1 -o - %s | FileCheck %s --check-prefix=ERRNO-TBAA +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -x c++ -emit-llvm -O1 -o - %s | FileCheck %s --check-prefix=ERRNO-TBAA +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -O1 -relaxed-aliasing -o - %s | FileCheck %s --check-prefix=NOSTRICT + +// Ensure !llvm.errno.tbaa metadata is emitted upon integer accesses, if TBAA is available. + +int int_access(int *ptr) { return ptr ? *ptr : 0; } + +// NOTBAA-NOT: !llvm.errno.tbaa +// ERRNO-TBAA: !llvm.errno.tbaa +// NOSTRICT-NOT: !llvm.errno.tbaa diff --git a/clang/test/CodeGen/isfpclass.c b/clang/test/CodeGen/isfpclass.c index 8a631c471c329..07e760e60b57b 100644 --- a/clang/test/CodeGen/isfpclass.c +++ b/clang/test/CodeGen/isfpclass.c @@ -162,17 +162,17 @@ int4 check_isfpclass_nan_strict_v4f32(float4 x) { // CHECK-LABEL: define dso_local void @check_isfpclass_nan_v4f64( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 16 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[X:%.*]] = load <4 x double>, ptr [[TMP0]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]] +// CHECK-NEXT: [[X:%.*]] = load <4 x double>, ptr [[TMP0]], align 16, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = fcmp uno <4 x double> [[X]], zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 16, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // long4 check_isfpclass_nan_v4f64(double4 x) { return __builtin_isfpclass(x, 3 /*NaN*/); } //. -// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c index 20a31003fe915..e5ab6e00c4e54 100644 --- a/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c +++ b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c @@ -54,13 +54,13 @@ long double powl(long double a, long double b); // // CHECK-MINGW32-LABEL: define dso_local void @test_powl( // CHECK-MINGW32-SAME: ptr dead_on_unwind noalias writable writeonly sret(x86_fp80) align 16 captures(none) initializes((0, 10)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-MINGW32: [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[LONG_DOUBLE_TBAA6:![0-9]+]] -// CHECK-MINGW32: [[B:%.*]] = load x86_fp80, ptr [[TMP1]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]] -// CHECK-MINGW32: store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]] -// CHECK-MINGW32: store x86_fp80 [[B]], ptr [[BYVAL_TEMP1:%.*]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]] +// CHECK-MINGW32: [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[LONG_DOUBLE_TBAA10:![0-9]+]] +// CHECK-MINGW32: [[B:%.*]] = load x86_fp80, ptr [[TMP1]], align 16, !tbaa [[LONG_DOUBLE_TBAA10]] +// CHECK-MINGW32: store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[LONG_DOUBLE_TBAA10]] +// CHECK-MINGW32: store x86_fp80 [[B]], ptr [[BYVAL_TEMP1:%.*]], align 16, !tbaa [[LONG_DOUBLE_TBAA10]] // CHECK-MINGW32: call void @powl(ptr dead_on_unwind nonnull writable sret(x86_fp80) align 16 [[TMP:%.*]], ptr dead_on_return noundef nonnull [[BYVAL_TEMP]], ptr dead_on_return noundef nonnull [[BYVAL_TEMP1]]) #[[ATTR3:[0-9]+]] -// CHECK-MINGW32: [[TMP2:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]] -// CHECK-MINGW32: store x86_fp80 [[TMP2]], ptr [[AGG_RESULT]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]] +// CHECK-MINGW32: [[TMP2:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[LONG_DOUBLE_TBAA10]] +// CHECK-MINGW32: store x86_fp80 [[TMP2]], ptr [[AGG_RESULT]], align 16, !tbaa [[LONG_DOUBLE_TBAA10]] // long double test_powl(long double a, long double b) { return powl(a, b); @@ -137,7 +137,7 @@ long double test_powl(long double a, long double b) { // CHECK-MINGW32: store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16 // CHECK-MINGW32: store x86_fp80 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 16 // CHECK-MINGW32: call void @cargl(ptr dead_on_unwind nonnull writable sret(x86_fp80) align 16 [[TMP:%.*]], ptr dead_on_return noundef nonnull [[BYVAL_TEMP]]) #[[ATTR3]] -// CHECK-MINGW32: [[TMP0:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]] +// CHECK-MINGW32: [[TMP0:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[LONG_DOUBLE_TBAA10]] // CHECK-MINGW32: [[CLD_REAL3:%.*]] = load x86_fp80, ptr [[CLD]], align 16 // CHECK-MINGW32: [[CLD_IMAG5:%.*]] = load x86_fp80, ptr [[CLD_IMAGP]], align 16 // CHECK-MINGW32: store x86_fp80 [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 16 @@ -190,8 +190,8 @@ int ilogbl(long double a); // // CHECK-MINGW32-LABEL: define dso_local i32 @test_ilogb( // CHECK-MINGW32-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-MINGW32: [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]] -// CHECK-MINGW32: store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]] +// CHECK-MINGW32: [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[LONG_DOUBLE_TBAA10]] +// CHECK-MINGW32: store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[LONG_DOUBLE_TBAA10]] // CHECK-MINGW32: [[CALL:%.*]] = call i32 @ilogbl(ptr dead_on_return noundef nonnull [[BYVAL_TEMP]]) #[[ATTR3]] // int test_ilogb(long double a) { @@ -243,8 +243,8 @@ int test_ilogb(long double a) { // CHECK-SPIR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK-SPIR: [[META5]] = !{!"Simple C/C++ TBAA"} //. -// CHECK-MINGW32: [[LONG_DOUBLE_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK-MINGW32: [[META7]] = !{!"long double", [[META8:![0-9]+]], i64 0} -// CHECK-MINGW32: [[META8]] = !{!"omnipotent char", [[META9:![0-9]+]], i64 0} +// CHECK-MINGW32: [[META8:![0-9]+]] = !{!"omnipotent char", [[META9:![0-9]+]], i64 0} // CHECK-MINGW32: [[META9]] = !{!"Simple C/C++ TBAA"} +// CHECK-MINGW32: [[LONG_DOUBLE_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0} +// CHECK-MINGW32: [[META11]] = !{!"long double", [[META8]], i64 0} //. diff --git a/clang/test/CodeGen/math-libcalls-tbaa.c b/clang/test/CodeGen/math-libcalls-tbaa.c index 53ca7963b27c1..ec234bca66371 100644 --- a/clang/test/CodeGen/math-libcalls-tbaa.c +++ b/clang/test/CodeGen/math-libcalls-tbaa.c @@ -17,8 +17,8 @@ float crealf(float _Complex); // NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // NONEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] // NONEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40 -// NONEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2:![0-9]+]] -// NONEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9:[0-9]+]], !tbaa [[INT_TBAA6:![0-9]+]] +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA6:![0-9]+]] +// NONEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9:[0-9]+]], !tbaa [[INT_TBAA2:![0-9]+]] // NONEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]] // NONEWSTRUCTPATHTBAA-NEXT: ret float [[MUL]] // @@ -26,8 +26,8 @@ float crealf(float _Complex); // NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // NEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] // NEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40 -// NEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2:![0-9]+]] -// NEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9:[0-9]+]], !tbaa [[TBAA6:![0-9]+]] +// NEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA6:![0-9]+]] +// NEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] // NEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]] // NEWSTRUCTPATHTBAA-NEXT: ret float [[MUL]] // @@ -41,8 +41,8 @@ float test_expf (float num[]) { // NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR0]] { // NONEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] // NONEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40 -// NONEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2]] -// NONEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9]], !tbaa [[INT_TBAA6]] +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA6]] +// NONEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9]], !tbaa [[INT_TBAA2]] // NONEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]] // NONEWSTRUCTPATHTBAA-NEXT: ret float [[MUL]] // @@ -50,8 +50,8 @@ float test_expf (float num[]) { // NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR0]] { // NEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] // NEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40 -// NEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] -// NEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9]], !tbaa [[TBAA6]] +// NEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA6]] +// NEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9]], !tbaa [[TBAA2]] // NEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]] // NEWSTRUCTPATHTBAA-NEXT: ret float [[MUL]] // @@ -92,7 +92,7 @@ double test_fabs (double num[]) { // NONEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] // NONEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 80 // NONEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA8]] -// NONEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call double @remainder(double noundef [[TMP0]], double noundef [[A]]) #[[ATTR9]], !tbaa [[INT_TBAA6]] +// NONEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call double @remainder(double noundef [[TMP0]], double noundef [[A]]) #[[ATTR9]], !tbaa [[INT_TBAA2]] // NONEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul double [[TMP0]], [[CALL]] // NONEWSTRUCTPATHTBAA-NEXT: ret double [[MUL]] // @@ -101,7 +101,7 @@ double test_fabs (double num[]) { // NEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] // NEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 80 // NEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA8]] -// NEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call double @remainder(double noundef [[TMP0]], double noundef [[A]]) #[[ATTR9]], !tbaa [[TBAA6]] +// NEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call double @remainder(double noundef [[TMP0]], double noundef [[A]]) #[[ATTR9]], !tbaa [[TBAA2]] // NEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul double [[TMP0]], [[CALL]] // NEWSTRUCTPATHTBAA-NEXT: ret double [[MUL]] // @@ -156,12 +156,12 @@ double test_frexp (double num[]) { // NONEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[SIN]]) #[[ATTR9]] // NONEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[COS]]) #[[ATTR9]] // NONEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8 -// NONEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2]] +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA6]] // NONEWSTRUCTPATHTBAA-NEXT: call void @sincos(float noundef [[TMP0]], ptr noundef nonnull [[SIN]], ptr noundef nonnull [[COS]]) #[[ATTR9]] -// NONEWSTRUCTPATHTBAA-NEXT: [[TMP1:%.*]] = load float, ptr [[SIN]], align 4, !tbaa [[FLOAT_TBAA2]] -// NONEWSTRUCTPATHTBAA-NEXT: [[TMP2:%.*]] = load float, ptr [[COS]], align 4, !tbaa [[FLOAT_TBAA2]] +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP1:%.*]] = load float, ptr [[SIN]], align 4, !tbaa [[FLOAT_TBAA6]] +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP2:%.*]] = load float, ptr [[COS]], align 4, !tbaa [[FLOAT_TBAA6]] // NONEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul float [[TMP1]], [[TMP2]] -// NONEWSTRUCTPATHTBAA-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2]] +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA6]] // NONEWSTRUCTPATHTBAA-NEXT: [[ADD:%.*]] = fadd float [[MUL]], [[TMP3]] // NONEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[COS]]) #[[ATTR9]] // NONEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[SIN]]) #[[ATTR9]] @@ -175,12 +175,12 @@ double test_frexp (double num[]) { // NEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[SIN]]) #[[ATTR9]] // NEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[COS]]) #[[ATTR9]] // NEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8 -// NEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA6]] // NEWSTRUCTPATHTBAA-NEXT: call void @sincos(float noundef [[TMP0]], ptr noundef nonnull [[SIN]], ptr noundef nonnull [[COS]]) #[[ATTR9]] -// NEWSTRUCTPATHTBAA-NEXT: [[TMP1:%.*]] = load float, ptr [[SIN]], align 4, !tbaa [[TBAA2]] -// NEWSTRUCTPATHTBAA-NEXT: [[TMP2:%.*]] = load float, ptr [[COS]], align 4, !tbaa [[TBAA2]] +// NEWSTRUCTPATHTBAA-NEXT: [[TMP1:%.*]] = load float, ptr [[SIN]], align 4, !tbaa [[TBAA6]] +// NEWSTRUCTPATHTBAA-NEXT: [[TMP2:%.*]] = load float, ptr [[COS]], align 4, !tbaa [[TBAA6]] // NEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul float [[TMP1]], [[TMP2]] -// NEWSTRUCTPATHTBAA-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NEWSTRUCTPATHTBAA-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA6]] // NEWSTRUCTPATHTBAA-NEXT: [[ADD:%.*]] = fadd float [[MUL]], [[TMP3]] // NEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[COS]]) #[[ATTR9]] // NEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[SIN]]) #[[ATTR9]] @@ -198,12 +198,12 @@ float test_sincos (float num[]) { // NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR7]] { // NONEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] // NONEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8 -// NONEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2]] +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA6]] // NONEWSTRUCTPATHTBAA-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x float] poison, float [[TMP0]], 0 // NONEWSTRUCTPATHTBAA-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x float] [[DOTFCA_0_INSERT]], float 0.000000e+00, 1 // NONEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call { float, float } @cacoshf([2 x float] noundef alignstack(8) [[DOTFCA_1_INSERT]]) #[[ATTR9]] // NONEWSTRUCTPATHTBAA-NEXT: [[TMP1:%.*]] = extractvalue { float, float } [[CALL]], 0 -// NONEWSTRUCTPATHTBAA-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2]] +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA6]] // NONEWSTRUCTPATHTBAA-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[TMP2]] // NONEWSTRUCTPATHTBAA-NEXT: ret float [[ADD]] // @@ -211,12 +211,12 @@ float test_sincos (float num[]) { // NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR7]] { // NEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] // NEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8 -// NEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA6]] // NEWSTRUCTPATHTBAA-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x float] poison, float [[TMP0]], 0 // NEWSTRUCTPATHTBAA-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x float] [[DOTFCA_0_INSERT]], float 0.000000e+00, 1 // NEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call { float, float } @cacoshf([2 x float] noundef alignstack(8) [[DOTFCA_1_INSERT]]) #[[ATTR9]] // NEWSTRUCTPATHTBAA-NEXT: [[TMP1:%.*]] = extractvalue { float, float } [[CALL]], 0 -// NEWSTRUCTPATHTBAA-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NEWSTRUCTPATHTBAA-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA6]] // NEWSTRUCTPATHTBAA-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[TMP2]] // NEWSTRUCTPATHTBAA-NEXT: ret float [[ADD]] // @@ -227,21 +227,21 @@ float test_cacoshf (float num[]) { } //. -// NONEWSTRUCTPATHTBAA: [[FLOAT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// NONEWSTRUCTPATHTBAA: [[META3]] = !{!"float", [[META4:![0-9]+]], i64 0} +// NONEWSTRUCTPATHTBAA: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// NONEWSTRUCTPATHTBAA: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} // NONEWSTRUCTPATHTBAA: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // NONEWSTRUCTPATHTBAA: [[META5]] = !{!"Simple C/C++ TBAA"} -// NONEWSTRUCTPATHTBAA: [[INT_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// NONEWSTRUCTPATHTBAA: [[META7]] = !{!"int", [[META4]], i64 0} +// NONEWSTRUCTPATHTBAA: [[FLOAT_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// NONEWSTRUCTPATHTBAA: [[META7]] = !{!"float", [[META4]], i64 0} // NONEWSTRUCTPATHTBAA: [[DOUBLE_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} // NONEWSTRUCTPATHTBAA: [[META9]] = !{!"double", [[META4]], i64 0} //. // NEWSTRUCTPATHTBAA: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0, i64 4} -// NEWSTRUCTPATHTBAA: [[META3]] = !{[[META4:![0-9]+]], i64 4, !"float"} +// NEWSTRUCTPATHTBAA: [[META3]] = !{[[META4:![0-9]+]], i64 4, !"int"} // NEWSTRUCTPATHTBAA: [[META4]] = !{[[META5:![0-9]+]], i64 1, !"omnipotent char"} // NEWSTRUCTPATHTBAA: [[META5]] = !{!"Simple C/C++ TBAA"} // NEWSTRUCTPATHTBAA: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0, i64 4} -// NEWSTRUCTPATHTBAA: [[META7]] = !{[[META4]], i64 4, !"int"} +// NEWSTRUCTPATHTBAA: [[META7]] = !{[[META4]], i64 4, !"float"} // NEWSTRUCTPATHTBAA: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0, i64 8} // NEWSTRUCTPATHTBAA: [[META9]] = !{[[META4]], i64 8, !"double"} //. diff --git a/clang/test/CodeGen/pointer-arithmetic-align.c b/clang/test/CodeGen/pointer-arithmetic-align.c index 745ab84635c1b..73b1c1f24bfc9 100644 --- a/clang/test/CodeGen/pointer-arithmetic-align.c +++ b/clang/test/CodeGen/pointer-arithmetic-align.c @@ -13,7 +13,7 @@ struct a { // CHECK-SAME: ptr noundef writeonly captures(none) initializes((8, 9)) [[CTX:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[BLOCK:%.*]] = getelementptr inbounds nuw i8, ptr [[CTX]], i64 8 -// CHECK-NEXT: store i8 0, ptr [[BLOCK]], align 8, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: store i8 0, ptr [[BLOCK]], align 8, !tbaa [[TBAA6:![0-9]+]] // CHECK-NEXT: ret void // void ptradd_0(struct a *ctx) { @@ -24,7 +24,7 @@ void ptradd_0(struct a *ctx) { // CHECK-SAME: ptr noundef writeonly captures(none) initializes((12, 13)) [[CTX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[CTX]], i64 12 -// CHECK-NEXT: store i8 0, ptr [[ADD_PTR]], align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: store i8 0, ptr [[ADD_PTR]], align 4, !tbaa [[TBAA6]] // CHECK-NEXT: ret void // void ptradd_4(struct a *ctx) { @@ -35,7 +35,7 @@ void ptradd_4(struct a *ctx) { // CHECK-SAME: ptr noundef writeonly captures(none) initializes((16, 17)) [[CTX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[CTX]], i64 16 -// CHECK-NEXT: store i8 0, ptr [[ADD_PTR]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i8 0, ptr [[ADD_PTR]], align 8, !tbaa [[TBAA6]] // CHECK-NEXT: ret void // void ptradd_8(struct a *ctx) { @@ -46,7 +46,7 @@ void ptradd_8(struct a *ctx) { // CHECK-SAME: ptr noundef writeonly captures(none) initializes((16, 17)) [[CTX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[CTX]], i64 16 -// CHECK-NEXT: store i8 0, ptr [[ADD_PTR]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i8 0, ptr [[ADD_PTR]], align 8, !tbaa [[TBAA6]] // CHECK-NEXT: ret void // void ptradd_8_commuted(struct a *ctx) { @@ -57,7 +57,7 @@ void ptradd_8_commuted(struct a *ctx) { // CHECK-SAME: ptr noundef writeonly captures(none) initializes((8, 9)) [[CTX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[CTX]], i64 8 -// CHECK-NEXT: store i8 0, ptr [[ADD_PTR]], align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: store i8 0, ptr [[ADD_PTR]], align 4, !tbaa [[TBAA6]] // CHECK-NEXT: ret void // void ptrsub_4(struct a *ctx) { @@ -70,14 +70,14 @@ void ptrsub_4(struct a *ctx) { // CHECK-NEXT: [[BLOCK:%.*]] = getelementptr inbounds nuw i8, ptr [[CTX]], i64 8 // CHECK-NEXT: [[IDX_EXT:%.*]] = zext i8 [[IDX]] to i64 // CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[BLOCK]], i64 [[IDX_EXT]] -// CHECK-NEXT: store i8 0, ptr [[ADD_PTR]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: store i8 0, ptr [[ADD_PTR]], align 1, !tbaa [[TBAA6]] // CHECK-NEXT: ret void // void neg_ptradd_var_index(struct a *ctx, uint8_t idx) { *(ctx->block + idx) = 0; } //. -// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[TBAA6]] = !{[[META4]], [[META4]], i64 0} //. diff --git a/clang/test/CodeGen/sanitize-metadata-ignorelist.c b/clang/test/CodeGen/sanitize-metadata-ignorelist.c index 4dc8c0c35fefe..dac69e97d5a1a 100644 --- a/clang/test/CodeGen/sanitize-metadata-ignorelist.c +++ b/clang/test/CodeGen/sanitize-metadata-ignorelist.c @@ -7,9 +7,9 @@ int y; // ALLOW-LABEL: define {{[^@]+}}@foo -// ALLOW-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] !pcsections !2 { +// ALLOW-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] !pcsections !6 { // ALLOW-NEXT: entry: -// ALLOW-NEXT: [[TMP0:%.*]] = atomicrmw add ptr @y, i32 1 monotonic, align 4, !pcsections !4 +// ALLOW-NEXT: [[TMP0:%.*]] = atomicrmw add ptr @y, i32 1 monotonic, align 4, !pcsections !8 // ALLOW-NEXT: ret void // // FUN-LABEL: define {{[^@]+}}@foo @@ -29,15 +29,15 @@ void foo() { } // ALLOW-LABEL: define {{[^@]+}}@bar -// ALLOW-SAME: () local_unnamed_addr #[[ATTR0]] !pcsections !2 { +// ALLOW-SAME: () local_unnamed_addr #[[ATTR0]] !pcsections !6 { // ALLOW-NEXT: entry: -// ALLOW-NEXT: [[TMP0:%.*]] = atomicrmw add ptr @y, i32 2 monotonic, align 4, !pcsections !4 +// ALLOW-NEXT: [[TMP0:%.*]] = atomicrmw add ptr @y, i32 2 monotonic, align 4, !pcsections !8 // ALLOW-NEXT: ret void // // FUN-LABEL: define {{[^@]+}}@bar -// FUN-SAME: () local_unnamed_addr #[[ATTR0]] !pcsections !2 { +// FUN-SAME: () local_unnamed_addr #[[ATTR0]] !pcsections !6 { // FUN-NEXT: entry: -// FUN-NEXT: [[TMP0:%.*]] = atomicrmw add ptr @y, i32 2 monotonic, align 4, !pcsections !4 +// FUN-NEXT: [[TMP0:%.*]] = atomicrmw add ptr @y, i32 2 monotonic, align 4, !pcsections !8 // FUN-NEXT: ret void // // SRC-LABEL: define {{[^@]+}}@bar diff --git a/clang/test/CodeGen/sanitize-metadata-nosanitize.c b/clang/test/CodeGen/sanitize-metadata-nosanitize.c index 22ed25bd3b670..f2672d7f89157 100644 --- a/clang/test/CodeGen/sanitize-metadata-nosanitize.c +++ b/clang/test/CodeGen/sanitize-metadata-nosanitize.c @@ -12,7 +12,7 @@ //. // CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: none) // CHECK-LABEL: define dso_local void @escape( -// CHECK-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !pcsections [[META2:![0-9]+]] { +// CHECK-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !pcsections [[META6:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret void // @@ -23,13 +23,13 @@ __attribute__((noinline, not_tail_called)) void escape(const volatile void *p) { // CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(write, argmem: readwrite, inaccessiblemem: none) // CHECK-LABEL: define dso_local i32 @normal_function( -// CHECK-SAME: ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !pcsections [[META4:![0-9]+]] { +// CHECK-SAME: ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !pcsections [[META8:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[INTPTR_TBAA6:![0-9]+]] -// CHECK-NEXT: store atomic i32 1, ptr [[X]] monotonic, align 4, !pcsections [[META11:![0-9]+]] +// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[INTPTR_TBAA10:![0-9]+]] +// CHECK-NEXT: store atomic i32 1, ptr [[X]] monotonic, align 4, !pcsections [[META13:![0-9]+]] // CHECK-NEXT: notail call void @escape(ptr noundef nonnull [[X_ADDR]]) -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] // CHECK-NEXT: ret i32 [[TMP0]] // int normal_function(int *x, int *y) { @@ -43,10 +43,10 @@ int normal_function(int *x, int *y) { // CHECK-SAME: ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[INTPTR_TBAA6]] +// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[INTPTR_TBAA10]] // CHECK-NEXT: store atomic i32 1, ptr [[X]] monotonic, align 4 // CHECK-NEXT: notail call void @escape(ptr noundef nonnull [[X_ADDR]]) -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: ret i32 [[TMP0]] // __attribute__((disable_sanitizer_instrumentation)) int test_disable_sanitize_instrumentation(int *x, int *y) { @@ -60,10 +60,10 @@ __attribute__((disable_sanitizer_instrumentation)) int test_disable_sanitize_ins // CHECK-SAME: ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] !pcsections [[META14:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[INTPTR_TBAA6]] -// CHECK-NEXT: store atomic i32 1, ptr [[X]] monotonic, align 4, !pcsections [[META11]] +// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[INTPTR_TBAA10]] +// CHECK-NEXT: store atomic i32 1, ptr [[X]] monotonic, align 4, !pcsections [[META13]] // CHECK-NEXT: notail call void @escape(ptr noundef nonnull [[X_ADDR]]) -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: ret i32 [[TMP0]] // __attribute__((no_sanitize("thread"))) int test_no_sanitize_thread(int *x, int *y) { @@ -77,10 +77,10 @@ __attribute__((no_sanitize("thread"))) int test_no_sanitize_thread(int *x, int * // CHECK-SAME: ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] !pcsections [[META14]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[INTPTR_TBAA6]] -// CHECK-NEXT: store atomic i32 1, ptr [[X]] monotonic, align 4, !pcsections [[META11]] +// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[INTPTR_TBAA10]] +// CHECK-NEXT: store atomic i32 1, ptr [[X]] monotonic, align 4, !pcsections [[META13]] // CHECK-NEXT: notail call void @escape(ptr noundef nonnull [[X_ADDR]]) -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: ret i32 [[TMP0]] // __attribute__((no_sanitize("all"))) int test_no_sanitize_all(int *x, int *y) { @@ -97,18 +97,18 @@ __attribute__((no_sanitize("all"))) int test_no_sanitize_all(int *x, int *y) { //. // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -// CHECK: [[META2]] = !{!"sanmd_covered2!C", [[META3:![0-9]+]]} -// CHECK: [[META3]] = !{i64 0} -// CHECK: [[META4]] = !{!"sanmd_covered2!C", [[META5:![0-9]+]]} -// CHECK: [[META5]] = !{i64 3} -// CHECK: [[INTPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK: [[META7]] = !{!"p1 int", [[META8:![0-9]+]], i64 0} -// CHECK: [[META8]] = !{!"any pointer", [[META9:![0-9]+]], i64 0} -// CHECK: [[META9]] = !{!"omnipotent char", [[META10:![0-9]+]], i64 0} -// CHECK: [[META10]] = !{!"Simple C/C++ TBAA"} -// CHECK: [[META11]] = !{!"sanmd_atomics2!C"} -// CHECK: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} -// CHECK: [[META13]] = !{!"int", [[META9]], i64 0} +// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META6]] = !{!"sanmd_covered2!C", [[META7:![0-9]+]]} +// CHECK: [[META7]] = !{i64 0} +// CHECK: [[META8]] = !{!"sanmd_covered2!C", [[META9:![0-9]+]]} +// CHECK: [[META9]] = !{i64 3} +// CHECK: [[INTPTR_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0} +// CHECK: [[META11]] = !{!"p1 int", [[META12:![0-9]+]], i64 0} +// CHECK: [[META12]] = !{!"any pointer", [[META4]], i64 0} +// CHECK: [[META13]] = !{!"sanmd_atomics2!C"} // CHECK: [[META14]] = !{!"sanmd_covered2!C", [[META15:![0-9]+]]} // CHECK: [[META15]] = !{i64 2} //. diff --git a/clang/test/CodeGen/tbaa-class.cpp b/clang/test/CodeGen/tbaa-class.cpp index 0ac59085e634d..4193e10afac8e 100644 --- a/clang/test/CodeGen/tbaa-class.cpp +++ b/clang/test/CodeGen/tbaa-class.cpp @@ -242,16 +242,16 @@ uint32_t g14(StructM2 *M, StructS *S) { return S->f16; } +// CHECK: [[TAG_i32]] = !{[[TYPE_i32:!.*]], [[TYPE_i32]], i64 0} +// CHECK: [[TYPE_i32]] = !{!"int", [[TYPE_char:!.*]], // CHECK: [[TYPE_char:!.*]] = !{!"omnipotent char", [[TAG_cxx_tbaa:!.*]], // CHECK: [[TAG_cxx_tbaa]] = !{!"Simple C++ TBAA"} -// CHECK: [[TAG_i32]] = !{[[TYPE_i32:!.*]], [[TYPE_i32]], i64 0} -// CHECK: [[TYPE_i32]] = !{!"int", [[TYPE_char]], // CHECK: [[TAG_i16]] = !{[[TYPE_i16:!.*]], [[TYPE_i16]], i64 0} // CHECK: [[TYPE_i16]] = !{!"short", [[TYPE_char]], -// OLD-PATH: [[TYPE_CHAR:!.*]] = !{!"omnipotent char", ! // OLD-PATH: [[TAG_i32]] = !{[[TYPE_INT:!.*]], [[TYPE_INT]], i64 0} -// OLD-PATH: [[TYPE_INT]] = !{!"int", [[TYPE_CHAR]] +// OLD-PATH: [[TYPE_INT]] = !{!"int", [[TYPE_CHAR:!.*]] +// OLD-PATH: [[TYPE_CHAR:!.*]] = !{!"omnipotent char", ! // OLD-PATH: [[TAG_A_f32]] = !{[[TYPE_A:!.*]], [[TYPE_INT]], i64 4} // OLD-PATH: [[TYPE_A]] = !{!"_ZTS7StructA", [[TYPE_SHORT:!.*]], i64 0, [[TYPE_INT]], i64 4, [[TYPE_SHORT]], i64 8, [[TYPE_INT]], i64 12} // OLD-PATH: [[TYPE_SHORT:!.*]] = !{!"short", [[TYPE_CHAR]] @@ -277,9 +277,9 @@ uint32_t g14(StructM2 *M, StructS *S) { // OLD-PATH: [[TYPE_M2]] = !{!"_ZTS8StructM2", [[TYPE_DYN:!.*]], i64 0, [[TYPE_S]], i64 12, [[TYPE_SHORT]], i64 20} // OLD_PATH: [[TYPE_DYN]] = !{!"_ZTS9StructDyn", [[TYPE_INT]], i64 8} +// NEW-PATH: [[TAG_i32]] = !{[[TYPE_INT:!.*]], [[TYPE_INT:!.*]], i64 0, i64 4} +// NEW-PATH: [[TYPE_INT]] = !{[[TYPE_CHAR:!.*]], i64 4, !"int"} // NEW-PATH: [[TYPE_CHAR:!.*]] = !{!{{.*}}, i64 1, !"omnipotent char"} -// NEW-PATH: [[TAG_i32]] = !{[[TYPE_INT:!.*]], [[TYPE_INT]], i64 0, i64 4} -// NEW-PATH: [[TYPE_INT]] = !{[[TYPE_CHAR]], i64 4, !"int"} // NEW-PATH: [[TAG_A_f32]] = !{[[TYPE_A:!.*]], [[TYPE_INT]], i64 4, i64 4} // NEW-PATH: [[TYPE_A]] = !{[[TYPE_CHAR]], i64 16, !"_ZTS7StructA", [[TYPE_SHORT:!.*]], i64 0, i64 2, [[TYPE_INT]], i64 4, i64 4, [[TYPE_SHORT]], i64 8, i64 2, [[TYPE_INT]], i64 12, i64 4} // NEW-PATH: [[TYPE_SHORT:!.*]] = !{[[TYPE_CHAR]], i64 2, !"short"} diff --git a/clang/test/CodeGen/tbaa-pointers.c b/clang/test/CodeGen/tbaa-pointers.c index 9cfaa0a47af6e..249cf5634ab11 100644 --- a/clang/test/CodeGen/tbaa-pointers.c +++ b/clang/test/CodeGen/tbaa-pointers.c @@ -1,18 +1,17 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-passes -no-pointer-tbaa %s -emit-llvm -o - | FileCheck --check-prefixes=COMMON,DISABLE %s // RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-passes %s -emit-llvm -o - | FileCheck --check-prefixes=COMMON,DEFAULT %s -// RUN: %clang --target=x86_64-apple-darwin -O1 -fno-pointer-tbaa %s -emit-llvm -S -mllvm -disable-llvm-optzns -o - | FileCheck --check-prefixes=COMMON,DISABLE %s -// RUN: %clang --target=x86_64-apple-darwin -O1 %s -emit-llvm -S -mllvm -disable-llvm-optzns -o - | FileCheck --check-prefixes=COMMON,DEFAULT %s void p2unsigned(unsigned **ptr) { // COMMON-LABEL: define void @p2unsigned( // COMMON-SAME: ptr noundef [[PTR:%.+]]) // COMMON: [[PTR_ADDR:%.+]] = alloca ptr, align 8 - // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[P2INT_0:!.+]] - // DEFAULT-NEXT: [[BASE:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[P2INT_0]] - // DEFAULT-NEXT: store ptr null, ptr [[BASE]], align 8, !tbaa [[P1INT_0:!.+]] - // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR:!.+]] - // DISABLE-NEXT: [[BASE:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: store ptr null, ptr [[BASE]], align 8, !tbaa [[ANYPTR]] + // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6:![0-9]+]] + // DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DEFAULT-NEXT: store ptr null, ptr [[TMP0]], align 8, !tbaa [[TBAA10:![0-9]+]] + // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6:![0-9]+]] + // DISABLE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: store ptr null, ptr [[TMP0]], align 8, !tbaa [[TBAA6]] // COMMON-NEXT: ret void // *ptr = 0; @@ -22,12 +21,12 @@ void p2unsigned_volatile(unsigned *volatile *ptr) { // COMMON-LABEL: define void @p2unsigned_volatile( // COMMON-SAME: ptr noundef [[PTR:%.+]]) // COMMON: [[PTR_ADDR:%.+]] = alloca ptr, align 8 - // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[P2INT_0]] - // DEFAULT-NEXT: [[BASE:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[P2INT_0]] - // DEFAULT-NEXT: store volatile ptr null, ptr [[BASE]], align 8, !tbaa [[P1INT_0]] - // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: [[BASE:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: store volatile ptr null, ptr [[BASE]], align 8, !tbaa [[ANYPTR]] + // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DEFAULT-NEXT: store volatile ptr null, ptr [[TMP0]], align 8, !tbaa [[TBAA10]] + // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: store volatile ptr null, ptr [[TMP0]], align 8, !tbaa [[TBAA6]] // COMMON-NEXT: ret void // *ptr = 0; @@ -37,14 +36,14 @@ void p3int(int ***ptr) { // COMMON-LABEL: define void @p3int( // COMMON-SAME: ptr noundef [[PTR:%.+]]) // COMMON: [[PTR_ADDR:%.+]] = alloca ptr, align 8 - // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[P3INT_0:!.+]] - // DEFAULT-NEXT: [[BASE_0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[P3INT_0]] - // DEFAULT-NEXT: [[BASE_1:%.+]] = load ptr, ptr [[BASE_0]], align 8, !tbaa [[P2INT_0]] - // DEFAULT-NEXT: store ptr null, ptr [[BASE_1]], align 8, !tbaa [[P1INT_0]] - // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: [[BASE_0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: [[BASE_1:%.+]] = load ptr, ptr [[BASE_0]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: store ptr null, ptr [[BASE_1]], align 8, !tbaa [[ANYPTR]] + // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA12:![0-9]+]] + // DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA12]] + // DEFAULT-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA6]] + // DEFAULT-NEXT: store ptr null, ptr [[TMP1]], align 8, !tbaa [[TBAA10]] + // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: store ptr null, ptr [[TMP1]], align 8, !tbaa [[TBAA6]] // COMMON-NEXT: ret void // **ptr = 0; @@ -54,16 +53,16 @@ void p4char(char ****ptr) { // COMMON-LABEL: define void @p4char( // COMMON-SAME: ptr noundef [[PTR:%.+]]) // COMMON: [[PTR_ADDR:%.+]] = alloca ptr, align 8 - // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[P4CHAR_0:!.+]] - // DEFAULT-NEXT: [[BASE_0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[P4CHAR_0]] - // DEFAULT-NEXT: [[BASE_1:%.+]] = load ptr, ptr [[BASE_0]], align 8, !tbaa [[P3CHAR_0:!.+]] - // DEFAULT-NEXT: [[BASE_2:%.+]] = load ptr, ptr [[BASE_1]], align 8, !tbaa [[P2CHAR_0:!.+]] - // DEFAULT-NEXT: store ptr null, ptr [[BASE_2]], align 8, !tbaa [[P1CHAR_0:!.+]] - // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: [[BASE_0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: [[BASE_1:%.+]] = load ptr, ptr [[BASE_0]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: [[BASE_2:%.+]] = load ptr, ptr [[BASE_1]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: store ptr null, ptr [[BASE_2]], align 8, !tbaa [[ANYPTR]] + // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA15:![0-9]+]] + // DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA15]] + // DEFAULT-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA18:![0-9]+]] + // DEFAULT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA20:![0-9]+]] + // DEFAULT-NEXT: store ptr null, ptr [[TMP2]], align 8, !tbaa [[TBAA22:![0-9]+]] + // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: store ptr null, ptr [[TMP2]], align 8, !tbaa [[TBAA6]] // COMMON-NEXT: ret void // ***ptr = 0; @@ -73,16 +72,16 @@ void p4char_const1(const char ****ptr) { // COMMON-LABEL: define void @p4char_const1( // COMMON-SAME: ptr noundef [[PTR:%.+]]) // COMMON: [[PTR_ADDR:%.+]] = alloca ptr, align 8 - // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[P4CHAR_0]] - // DEFAULT-NEXT: [[BASE_0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[P4CHAR_0]] - // DEFAULT-NEXT: [[BASE_1:%.+]] = load ptr, ptr [[BASE_0]], align 8, !tbaa [[P3CHAR_0]] - // DEFAULT-NEXT: [[BASE_2:%.+]] = load ptr, ptr [[BASE_1]], align 8, !tbaa [[P2CHAR_0]] - // DEFAULT-NEXT: store ptr null, ptr [[BASE_2]], align 8, !tbaa [[P1CHAR_0]] - // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: [[BASE_0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: [[BASE_1:%.+]] = load ptr, ptr [[BASE_0]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: [[BASE_2:%.+]] = load ptr, ptr [[BASE_1]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: store ptr null, ptr [[BASE_2]], align 8, !tbaa [[ANYPTR]] + // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA15]] + // DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA15]] + // DEFAULT-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA18]] + // DEFAULT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA20]] + // DEFAULT-NEXT: store ptr null, ptr [[TMP2]], align 8, !tbaa [[TBAA22]] + // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: store ptr null, ptr [[TMP2]], align 8, !tbaa [[TBAA6]] // COMMON-NEXT: ret void // ***ptr = 0; @@ -92,16 +91,16 @@ void p4char_const2(const char **const **ptr) { // COMMON-LABEL: define void @p4char_const2( // COMMON-SAME: ptr noundef [[PTR:%.+]]) // COMMON: [[PTR_ADDR:%.+]] = alloca ptr, align 8 - // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[P4CHAR_0]] - // DEFAULT-NEXT: [[BASE_0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[P4CHAR_0]] - // DEFAULT-NEXT: [[BASE_1:%.+]] = load ptr, ptr [[BASE_0]], align 8, !tbaa [[P3CHAR_0]] - // DEFAULT-NEXT: [[BASE_2:%.+]] = load ptr, ptr [[BASE_1]], align 8, !tbaa [[P2CHAR_0]] - // DEFAULT-NEXT: store ptr null, ptr [[BASE_2]], align 8, !tbaa [[P1CHAR_0]] - // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: [[BASE_0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: [[BASE_1:%.+]] = load ptr, ptr [[BASE_0]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: [[BASE_2:%.+]] = load ptr, ptr [[BASE_1]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: store ptr null, ptr [[BASE_2]], align 8, !tbaa [[ANYPTR]] + // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA15]] + // DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA15]] + // DEFAULT-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA18]] + // DEFAULT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA20]] + // DEFAULT-NEXT: store ptr null, ptr [[TMP2]], align 8, !tbaa [[TBAA22]] + // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: store ptr null, ptr [[TMP2]], align 8, !tbaa [[TBAA6]] // COMMON-NEXT: ret void // ***ptr = 0; @@ -116,12 +115,12 @@ void p2struct(struct S1 **ptr) { // COMMON-LABEL: define void @p2struct( // COMMON-SAME: ptr noundef [[PTR:%.+]]) // COMMON: [[PTR_ADDR:%.+]] = alloca ptr, align 8 - // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[P2S1_TAG:!.+]] - // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] - // DEFAULT-NEXT: [[BASE:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[P2S1_TAG]] - // DISABLE-NEXT: [[BASE:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] - // DEFAULT-NEXT: store ptr null, ptr [[BASE]], align 8, !tbaa [[P1S1_TAG:!.+]] - // DISABLE-NEXT: store ptr null, ptr [[BASE]], align 8, !tbaa [[ANYPTR]] + // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA24:![0-9]+]] + // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA24]] + // DISABLE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DEFAULT-NEXT: store ptr null, ptr [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] + // DISABLE-NEXT: store ptr null, ptr [[TMP0]], align 8, !tbaa [[TBAA6]] // COMMON-NEXT: ret void // *ptr = 0; @@ -131,10 +130,12 @@ void p2struct_const(struct S1 const **ptr) { // COMMON-LABEL: define void @p2struct_const( // COMMON-SAME: ptr noundef [[PTR:%.+]]) // COMMON: [[PTR_ADDR:%.+]] = alloca ptr, align 8 - // COMMON-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR:!.+]] - // COMMON-NEXT: [[BASE:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] - // DEFAULT-NEXT: store ptr null, ptr [[BASE]], align 8, !tbaa [[P1S1_TAG]] - // DISABLE-NEXT: store ptr null, ptr [[BASE]], align 8, !tbaa [[ANYPTR]] + // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA24]] + // DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA24]] + // DEFAULT-NEXT: store ptr null, ptr [[TMP0]], align 8, !tbaa [[TBAA26]] + // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: store ptr null, ptr [[TMP0]], align 8, !tbaa [[TBAA6]] // COMMON-NEXT: ret void // *ptr = 0; @@ -148,14 +149,14 @@ void p2struct2(struct S2 *ptr) { // COMMON-LABEL: define void @p2struct2( // COMMON-SAME: ptr noundef [[PTR:%.+]]) // COMMON: [[PTR_ADDR:%.+]] = alloca ptr, align 8 - // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[P1S2_TAG:!.+]] - // DEFAULT-NEXT: [[BASE:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[P1S2_TAG]] - // DEFAULT-NEXT: [[S:%.+]] = getelementptr inbounds nuw %struct.S2, ptr [[BASE]], i32 0, i32 0 - // DEFAULT-NEXT: store ptr null, ptr [[S]], align 8, !tbaa [[S2_S_TAG:!.+]] - // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: [[BASE:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] - // DISABLE-NEXT: [[S:%.+]] = getelementptr inbounds nuw %struct.S2, ptr [[BASE]], i32 0, i32 0 - // DISABLE-NEXT: store ptr null, ptr [[S]], align 8, !tbaa [[S2_S_TAG:!.+]] + // DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA28:![0-9]+]] + // DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA28]] + // DEFAULT-NEXT: [[S:%.*]] = getelementptr inbounds nuw [[STRUCT_S2:%.*]], ptr [[TMP0]], i32 0, i32 0 + // DEFAULT-NEXT: store ptr null, ptr [[S]], align 8, !tbaa [[TBAA30:![0-9]+]] + // DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] + // DISABLE-NEXT: [[S:%.*]] = getelementptr inbounds nuw [[STRUCT_S2:%.*]], ptr [[TMP0]], i32 0, i32 0 + // DISABLE-NEXT: store ptr null, ptr [[S]], align 8, !tbaa [[TBAA8:![0-9]+]] // COMMON-NEXT: ret void ptr->s = 0; } @@ -167,21 +168,21 @@ void vla1(int n, int ptr[][n], int idx) { // COMMON: [[N_ADDR:%.+]] = alloca i32, align 4 // COMMON-NEXT: [[PTR_ADDR:%.+]] = alloca ptr, align 8 // COMMON-NEXT: [[IDX_ADDR:%.+]] = alloca i32, align 4 -// COMMON-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4, !tbaa [[INT_TY:!.+]] -// DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[P1INT0:!.+]] -// DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] -// COMMON-NEXT: store i32 [[IDX]], ptr [[IDX_ADDR]], align 4, !tbaa [[INT_TY]] -// COMMON-NEXT: [[L:%.+]] = load i32, ptr [[N_ADDR]], align 4, !tbaa [[INT_TY]] -// COMMON-NEXT: [[L_EXT:%.+]] = zext i32 [[L]] to i64 -// DEFAULT-NEXT: [[L_PTR:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[P1INT0]] -// DISABLE-NEXT: [[L_PTR:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] -// COMMON-NEXT: [[L_IDX:%.+]] = load i32, ptr [[IDX_ADDR]], align 4, !tbaa [[INT_TY]] -// COMMON-NEXT: [[IDX_EXT:%.+]] = sext i32 [[L_IDX]] to i64 -// COMMON-NEXT: [[MUL:%.+]] = mul nsw i64 [[IDX_EXT]], [[L_EXT]] -// COMMON-NEXT: [[GEP1:%.+]] = getelementptr inbounds i32, ptr [[L_PTR]], i64 [[MUL]] -// COMMON-NEXT: [[GEP2:%.+]] = getelementptr inbounds i32, ptr [[GEP1]], i64 0 -// COMMON-NEXT: store i32 0, ptr [[GEP2]], align 4, !tbaa [[INT_TAG:!.+]] -// DEFAULT-NEXT: ret void +// COMMON-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4, !tbaa [[TBAA2:![0-9]+]] +// DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA10]] +// DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] +// COMMON-NEXT: store i32 [[IDX]], ptr [[IDX_ADDR]], align 4, !tbaa [[TBAA2]] +// COMMON-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4, !tbaa [[TBAA2]] +// COMMON-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// DEFAULT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA10]] +// DISABLE-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] +// COMMON-NEXT: [[TMP3:%.*]] = load i32, ptr [[IDX_ADDR]], align 4, !tbaa [[TBAA2]] +// COMMON-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP3]] to i64 +// COMMON-NEXT: [[TMP4:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] +// COMMON-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP4]] +// COMMON-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0 +// COMMON-NEXT: store i32 0, ptr [[ARRAYIDX1]], align 4, !tbaa [[TBAA2]] +// COMMON-NEXT: ret void ptr[idx][0] = 0; } @@ -194,11 +195,13 @@ void unamed_struct_typedef(TypedefS *ptr) { // COMMON-LABEL: define void @unamed_struct_typedef( // COMMON-SAME: ptr noundef [[PTRA:%.+]]) // COMMON: [[PTR_ADDR:%.+]] = alloca ptr, align 8 -// DISABLE-NEXT: store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] -// DEFAULT-NEXT: store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR:!.+]] -// COMMON-NEXT: [[L0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] -// COMMON-NEXT: [[GEP:%.+]] = getelementptr inbounds nuw %struct.TypedefS, ptr [[L0]], i32 0, i32 0 -// COMMON-NEXT: store i32 0, ptr [[GEP]], align 4 +// DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] +// DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA32:![0-9]+]] +// DISABLE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA32]] +// COMMON-NEXT: [[I1:%.*]] = getelementptr inbounds nuw [[STRUCT_TYPEDEFS:%.*]], ptr [[TMP0]], i32 0, i32 0 +// DISABLE-NEXT: store i32 0, ptr [[I1]], align 4, !tbaa [[TBAA10:![0-9]+]] +// DEFAULT-NEXT: store i32 0, ptr [[I1]], align 4, !tbaa [[TBAA33:![0-9]+]] // COMMON-NEXT: ret void ptr->i1 = 0; @@ -208,52 +211,62 @@ int void_ptrs(void **ptr) { // COMMON-LABEL: define i32 @void_ptrs( // COMMON-SAME: ptr noundef [[PTRA:%.+]]) // COMMON: [[PTR_ADDR:%.+]] = alloca ptr, align 8 -// DISABLE-NEXT: store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] -// DEFAULT-NEXT: store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYP2:!.+]] -// DISABLE-NEXT: [[L0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] -// DEFAULT-NEXT: [[L0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYP2]] -// COMMON-NEXT: [[L1:%.+]] = load ptr, ptr [[L0]], align 8, !tbaa [[ANYPTR]] -// COMMON-NEXT: [[BOOL:%.+]] = icmp ne ptr [[L1]], null -// COMMON-NEXT: [[BOOL_EXT:%.+]] = zext i1 [[BOOL]] to i64 -// COMMON-NEXT: [[COND:%.+]] = select i1 [[BOOL]], i32 0, i32 1 +// DISABLE-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] +// DEFAULT-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA35:![0-9]+]] +// DISABLE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA6]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[TBAA35]] +// DISABLE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA6]] +// DEFAULT-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA32]] +// COMMON-NEXT: [[TOBOOL:%.*]] = icmp ne ptr [[TMP1]], null +// COMMON-NEXT: [[TMP2:%.*]] = zext i1 [[TOBOOL]] to i64 +// COMMON-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 0, i32 1 // COMMON-NEXT: ret i32 [[COND]] return *ptr ? 0 : 1; } -// DEFAULT: [[P2INT_0]] = !{[[P2INT:!.+]], [[P2INT]], i64 0} -// DEFAULT: [[P2INT]] = !{!"p2 int", [[ANY_P2_POINTER:!.+]], i64 0} -// DEFAULT: [[ANY_P2_POINTER]] = !{!"any p2 pointer", [[ANY_POINTER:!.+]], i64 0} -// DISABLE: [[ANYPTR]] = !{[[ANY_POINTER:!.+]], [[ANY_POINTER]], i64 0} -// COMMON: [[ANY_POINTER]] = !{!"any pointer", [[CHAR:!.+]], i64 0} -// COMMON: [[CHAR]] = !{!"omnipotent char", [[TBAA_ROOT:!.+]], i64 0} -// COMMON: [[TBAA_ROOT]] = !{!"Simple C/C++ TBAA"} -// DEFAULT: [[P1INT_0]] = !{[[P1INT:!.+]], [[P1INT]], i64 0} -// DEFAULT: [[P1INT]] = !{!"p1 int", [[ANY_POINTER]], i64 0} -// DEFAULT: [[P3INT_0]] = !{[[P3INT:!.+]], [[P3INT]], i64 0} -// DEFAULT: [[P3INT]] = !{!"p3 int", [[ANY_P3_POINTER:!.+]], i64 0} -// DEFAULT: [[ANY_P3_POINTER]] = !{!"any p3 pointer", [[ANY_P2_POINTER]], i64 0} -// DEFAULT: [[P4CHAR_0]] = !{[[P4CHAR:!.+]], [[P4CHAR]], i64 0} -// DEFAULT: [[P4CHAR]] = !{!"p4 omnipotent char", [[ANY_P4_POINTER:!.*]], i64 0} -// DEFAULT: [[ANY_P4_POINTER]] = !{!"any p4 pointer", [[ANY_P3_POINTER]], i64 0} -// DEFAULT: [[P3CHAR_0]] = !{[[P3CHAR:!.+]], [[P3CHAR]], i64 0} -// DEFAULT: [[P3CHAR]] = !{!"p3 omnipotent char", [[ANY_P3_POINTER]], i64 0} -// DEFAULT: [[P2CHAR_0]] = !{[[P2CHAR:!.+]], [[P2CHAR]], i64 0} -// DEFAULT: [[P2CHAR]] = !{!"p2 omnipotent char", [[ANY_P2_POINTER]], i64 0} -// DEFAULT: [[P1CHAR_0]] = !{[[P1CHAR:!.+]], [[P1CHAR]], i64 0} -// DEFAULT: [[P1CHAR]] = !{!"p1 omnipotent char", [[ANY_POINTER]], i64 0} -// DEFAULT: [[P2S1_TAG]] = !{[[P2S1:!.+]], [[P2S1]], i64 0} -// DEFAULT: [[P2S1]] = !{!"p2 _ZTS2S1", [[ANY_P2_POINTER]], i64 0} -// DEFAULT: [[P1S1_TAG:!.+]] = !{[[P1S1:!.+]], [[P1S1]], i64 0} -// DEFAULT: [[P1S1]] = !{!"p1 _ZTS2S1", [[ANY_POINTER]], i64 0} -// DEFAULT: [[P1S2_TAG]] = !{[[P1S2:!.+]], [[P1S2]], i64 0} -// DEFAULT: [[P1S2]] = !{!"p1 _ZTS2S2", [[ANY_POINTER]], i64 0} - -// DEFAULT: [[S2_S_TAG]] = !{[[S2_TY:!.+]], [[P1S1]], i64 0} -// DEFAULT: [[S2_TY]] = !{!"S2", [[P1S1]], i64 0} -// DISABLE: [[S2_S_TAG]] = !{[[S2_TY:!.+]], [[ANY_POINTER]], i64 0} -// DISABLE: [[S2_TY]] = !{!"S2", [[ANY_POINTER]], i64 0} -// COMMON: [[INT_TAG]] = !{[[INT_TY:!.+]], [[INT_TY]], i64 0} -// COMMON: [[INT_TY]] = !{!"int", [[CHAR]], i64 0} -// DEFAULT: [[ANYPTR]] = !{[[ANY_POINTER]], [[ANY_POINTER]], i64 0} -// DEFAULT: [[ANYP2]] = !{[[ANY_P2_POINTER]], [[ANY_P2_POINTER]], i64 0} +// DISABLE: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// DISABLE: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// DISABLE: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// DISABLE: [[META5]] = !{!"Simple C/C++ TBAA"} +// DISABLE: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// DISABLE: [[META7]] = !{!"any pointer", [[META4]], i64 0} +// DISABLE: [[TBAA8]] = !{[[META9:![0-9]+]], [[META7]], i64 0} +// DISABLE: [[META9]] = !{!"S2", [[META7]], i64 0} +// DISABLE: [[TBAA10]] = !{[[META11:![0-9]+]], [[META3]], i64 0} +// DISABLE: [[META11]] = !{!"", [[META3]], i64 0} +// DEFAULT: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// DEFAULT: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// DEFAULT: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// DEFAULT: [[META5]] = !{!"Simple C/C++ TBAA"} +// DEFAULT: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// DEFAULT: [[META7]] = !{!"p2 int", [[META8:![0-9]+]], i64 0} +// DEFAULT: [[META8]] = !{!"any p2 pointer", [[META9:![0-9]+]], i64 0} +// DEFAULT: [[META9]] = !{!"any pointer", [[META4]], i64 0} +// DEFAULT: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0} +// DEFAULT: [[META11]] = !{!"p1 int", [[META9]], i64 0} +// DEFAULT: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// DEFAULT: [[META13]] = !{!"p3 int", [[META14:![0-9]+]], i64 0} +// DEFAULT: [[META14]] = !{!"any p3 pointer", [[META8]], i64 0} +// DEFAULT: [[TBAA15]] = !{[[META16:![0-9]+]], [[META16]], i64 0} +// DEFAULT: [[META16]] = !{!"p4 omnipotent char", [[META17:![0-9]+]], i64 0} +// DEFAULT: [[META17]] = !{!"any p4 pointer", [[META14]], i64 0} +// DEFAULT: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} +// DEFAULT: [[META19]] = !{!"p3 omnipotent char", [[META14]], i64 0} +// DEFAULT: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +// DEFAULT: [[META21]] = !{!"p2 omnipotent char", [[META8]], i64 0} +// DEFAULT: [[TBAA22]] = !{[[META23:![0-9]+]], [[META23]], i64 0} +// DEFAULT: [[META23]] = !{!"p1 omnipotent char", [[META9]], i64 0} +// DEFAULT: [[TBAA24]] = !{[[META25:![0-9]+]], [[META25]], i64 0} +// DEFAULT: [[META25]] = !{!"p2 _ZTS2S1", [[META8]], i64 0} +// DEFAULT: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0} +// DEFAULT: [[META27]] = !{!"p1 _ZTS2S1", [[META9]], i64 0} +// DEFAULT: [[TBAA28]] = !{[[META29:![0-9]+]], [[META29]], i64 0} +// DEFAULT: [[META29]] = !{!"p1 _ZTS2S2", [[META9]], i64 0} +// DEFAULT: [[TBAA30]] = !{[[META31:![0-9]+]], [[META27]], i64 0} +// DEFAULT: [[META31]] = !{!"S2", [[META27]], i64 0} +// DEFAULT: [[TBAA32]] = !{[[META9]], [[META9]], i64 0} +// DEFAULT: [[TBAA33]] = !{[[META34:![0-9]+]], [[META3]], i64 0} +// DEFAULT: [[META34]] = !{!"", [[META3]], i64 0} +// DEFAULT: [[TBAA35]] = !{[[META8]], [[META8]], i64 0} +//. diff --git a/clang/test/CodeGen/tbaa-struct-bitfield-endianness.cpp b/clang/test/CodeGen/tbaa-struct-bitfield-endianness.cpp index 1177691ca511e..0775f9996f1b2 100644 --- a/clang/test/CodeGen/tbaa-struct-bitfield-endianness.cpp +++ b/clang/test/CodeGen/tbaa-struct-bitfield-endianness.cpp @@ -18,18 +18,18 @@ struct NamedBitfields { // CHECK-LABEL: _Z4copyP14NamedBitfieldsS0_ // CHECK-SAME: ptr noundef writeonly captures(none) initializes((0, 16)) [[A1:%.*]], ptr noundef readonly captures(none) [[A2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) [[A1]], ptr noundef nonnull align 8 dereferenceable(16) [[A2]], i64 16, i1 false), !tbaa.struct [[TBAA_STRUCT2:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) [[A1]], ptr noundef nonnull align 8 dereferenceable(16) [[A2]], i64 16, i1 false), !tbaa.struct [[TBAA_STRUCT6:![0-9]+]] // CHECK-NEXT: ret void // void copy(NamedBitfields *a1, NamedBitfields *a2) { *a1 = *a2; } -// CHECK: [[TBAA_STRUCT2]] = !{i64 0, i64 4, [[META3:![0-9]+]], i64 4, i64 4, [[META6:![0-9]+]], i64 8, i64 8, [[META8:![0-9]+]]} -// CHECK: [[META3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK: [[META2:![0-9]+]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} // CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"Simple C++ TBAA"} -// CHECK: [[META6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK: [[META7]] = !{!"int", [[META4]], i64 0} +// CHECK: [[TBAA_STRUCT6]] = !{i64 0, i64 4, [[META7:![0-9]+]], i64 4, i64 4, [[META2:![0-9]+]], i64 8, i64 8, [[META8:![0-9]+]]} +// CHECK: [[META7]] = !{[[META4:![0-9]+]], [[META4]], i64 0} // CHECK: [[META8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} // CHECK: [[META9]] = !{!"double", [[META4]], i64 0} diff --git a/clang/test/CodeGen/tbaa-struct.cpp b/clang/test/CodeGen/tbaa-struct.cpp index ca076ce5aa273..48f84928d93a5 100644 --- a/clang/test/CodeGen/tbaa-struct.cpp +++ b/clang/test/CodeGen/tbaa-struct.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - -O1 %s | \ // RUN: FileCheck -check-prefixes=CHECK,CHECK-OLD %s // RUN: %clang_cc1 -triple x86_64-apple-darwin -new-struct-path-tbaa \ @@ -18,9 +19,9 @@ typedef A __attribute__((may_alias)) AA; void copy(A *a1, A *a2) { // CHECK-LABEL: _Z4copyP1AS0_ // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(16) %{{.*}}, ptr noundef nonnull align 4 dereferenceable(16) %{{.*}}, i64 16, i1 false) -// CHECK-OLD-SAME: !tbaa.struct [[TS:!.*]] -// CHECK-NEW-SAME: !tbaa [[TAG_A:![0-9]*]] - *a1 = *a2; +// CHECK-OLD-SAME: !tbaa.struct [[TBAA_STRUCT6:![0-9]+]] +// CHECK-NEW-SAME: !tbaa [[TBAA6:![0-9]+]] +*a1 = *a2; } struct B { @@ -30,10 +31,10 @@ struct B { }; void copy2(B *b1, B *b2) { -// CHECK-LABEL: _Z5copy2P1BS0_ +// CHECK-LABEL: _Z5copy2P1BS0_ // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) %{{.*}}, ptr noundef nonnull align 4 dereferenceable(24) %{{.*}}, i64 24, i1 false) -// CHECK-OLD-SAME: !tbaa.struct [[TS2:!.*]] -// CHECK-NEW-SAME: !tbaa [[TAG_B:![0-9]*]] +// CHECK-OLD-SAME: !tbaa.struct [[TBAA_STRUCT10:![0-9]+]] +// CHECK-NEW-SAME: !tbaa [[TBAA12:![0-9]+]] *b1 = *b2; } @@ -50,9 +51,9 @@ union U { void copy3(U *u1, U *u2) { // CHECK-LABEL: _Z5copy3P1US0_ // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(12) %{{.*}}, ptr noundef nonnull align 4 dereferenceable(12) %{{.*}}, i64 12, i1 false) -// CHECK-OLD-SAME: !tbaa.struct [[TS3:!.*]] -// CHECK-NEW-SAME: !tbaa [[TAG_U:![0-9]*]] - *u1 = *u2; +// CHECK-OLD-SAME: !tbaa.struct [[TBAA_STRUCT11:![0-9]+]] +// CHECK-NEW-SAME: !tbaa [[TBAA15:![0-9]+]] +*u1 = *u2; } // Make sure that zero-length bitfield works. @@ -66,8 +67,8 @@ struct C { void copy4(C *c1, C *c2) { // CHECK-LABEL: _Z5copy4P1CS0_ // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) {{.*}}, ptr noundef nonnull align 1 dereferenceable(3) {{.*}}, i64 3, i1 false) -// CHECK-OLD-SAME: !tbaa.struct [[TS4:!.*]] -// CHECK-NEW-SAME: !tbaa [[TAG_C:![0-9]*]] +// CHECK-OLD-SAME: !tbaa.struct [[TBAA_STRUCT12:![0-9]+]] +// CHECK-NEW-SAME: !tbaa [[TBAA17:![0-9]+]] *c1 = *c2; } @@ -81,24 +82,24 @@ struct D { void copy5(D *d1, D *d2) { // CHECK-LABEL: _Z5copy5P1DS0_ // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(6) {{.*}}, ptr noundef nonnull align 1 dereferenceable(6) {{.*}}, i64 6, i1 false) -// CHECK-OLD-SAME: !tbaa.struct [[TS5:!.*]] -// CHECK-NEW-SAME: !tbaa [[TAG_D:![0-9]*]] +// CHECK-OLD-SAME: !tbaa.struct [[TBAA_STRUCT13:![0-9]+]] +// CHECK-NEW-SAME: !tbaa [[TBAA20:![0-9]+]] *d1 = *d2; } void copy6(AA *a1, A *a2) { // CHECK-LABEL: _Z5copy6P1AS0_ // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(16) %{{.*}}, ptr noundef nonnull align 4 dereferenceable(16) %{{.*}}, i64 16, i1 false) -// CHECK-OLD-SAME: !tbaa.struct [[TS]] -// CHECK-NEW-SAME: !tbaa [[TAG_char:![0-9]*]] +// CHECK-OLD-SAME: !tbaa.struct [[TBAA_STRUCT6]] +// CHECK-NEW-SAME: !tbaa [[TBAA23:![0-9]+]] *a1 = *a2; } void copy7(A *a1, AA *a2) { // CHECK-LABEL: _Z5copy7P1AS0_ // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(16) %{{.*}}, ptr noundef nonnull align 4 dereferenceable(16) %{{.*}}, i64 16, i1 false) -// CHECK-OLD-SAME: !tbaa.struct [[TS]] -// CHECK-NEW-SAME: !tbaa [[TAG_char]] +// CHECK-OLD-SAME: !tbaa.struct [[TBAA_STRUCT6]] +// CHECK-NEW-SAME: !tbaa [[TBAA23]] *a1 = *a2; } @@ -112,8 +113,8 @@ struct NamedBitfields { void copy8(NamedBitfields *a1, NamedBitfields *a2) { // CHECK-LABEL: _Z5copy8P14NamedBitfieldsS0_ // CHECK: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %a1, ptr noundef nonnull align 8 dereferenceable(16) %a2, i64 16, i1 false), -// CHECK-OLD-SAME: !tbaa.struct [[TS6:!.*]] -// CHECK-NEW-SAME: !tbaa [[TAG_NamedBitfields:!.+]], !tbaa.struct +// CHECK-OLD-SAME: !tbaa.struct [[TBAA_STRUCT14:![0-9]+]] +// CHECK-NEW-SAME: !tbaa [[TBAA24:![0-9]+]], !tbaa.struct *a1 = *a2; } @@ -129,8 +130,8 @@ struct NamedBitfields2 { void copy9(NamedBitfields2 *a1, NamedBitfields2 *a2) { // CHECK-LABEL: _Z5copy9P15NamedBitfields2S0_ // CHECK: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(24) %a1, ptr noundef nonnull align 8 dereferenceable(24) %a2, i64 24, i1 false), -// CHECK-OLD-SAME: !tbaa.struct [[TS7:!.*]] -// CHECK-NEW-SAME: !tbaa [[TAG_NamedBitfields2:!.+]], !tbaa.struct +// CHECK-OLD-SAME: !tbaa.struct [[TBAA_STRUCT17:![0-9]+]] +// CHECK-NEW-SAME: !tbaa [[TBAA30:![0-9]+]], !tbaa.struct *a1 = *a2; } @@ -146,8 +147,8 @@ struct NamedBitfields3 { void copy10(NamedBitfields3 *a1, NamedBitfields3 *a2) { // CHECK-LABEL: _Z6copy10P15NamedBitfields3S0_ // CHECK: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %a1, ptr noundef nonnull align 8 dereferenceable(16) %a2, i64 16, i1 false), -// CHECK-OLD-SAME: !tbaa.struct [[TS8:!.*]] -// CHECK-NEW-SAME: !tbaa [[TAG_NamedBitfields3:!.+]], !tbaa.struct +// CHECK-OLD-SAME: !tbaa.struct [[TBAA_STRUCT18:![0-9]+]] +// CHECK-NEW-SAME: !tbaa [[TBAA33:![0-9]+]], !tbaa.struct *a1 = *a2; } @@ -164,8 +165,8 @@ struct UnionMember1 { void copy11(UnionMember1 *a1, UnionMember1 *a2) { // CHECK-LABEL: _Z6copy11P12UnionMember1S0_ // CHECK: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %a1, ptr noundef nonnull align 8 dereferenceable(16) %a2, i64 16, i1 false), -// CHECK-OLD-SAME: !tbaa.struct [[TS9:!.*]] -// CHECK-NEW-SAME: !tbaa [[TAG_UnionMember1:!.+]], !tbaa.struct +// CHECK-OLD-SAME: !tbaa.struct [[TBAA_STRUCT19:![0-9]+]] +// CHECK-NEW-SAME: !tbaa [[TBAA37:![0-9]+]], !tbaa.struct *a1 = *a2; } @@ -177,52 +178,61 @@ struct UnionMember2 { void copy12(UnionMember2 *a1, UnionMember2 *a2) { // CHECK-LABEL: _Z6copy12P12UnionMember2S0_ // CHECK: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %a1, ptr noundef nonnull align 8 dereferenceable(16) %a2, i64 16, i1 false), -// CHECK-OLD-SAME: !tbaa.struct [[TS10:!.*]] -// CHECK-NEW-SAME: !tbaa [[TAG_UnionMember2:!.+]], !tbaa.struct +// CHECK-OLD-SAME: !tbaa.struct [[TBAA_STRUCT20:![0-9]+]] +// CHECK-NEW-SAME: !tbaa [[TBAA41:![0-9]+]], !tbaa.struct *a1 = *a2; } -// CHECK-OLD: [[TS]] = !{i64 0, i64 2, !{{.*}}, i64 4, i64 4, !{{.*}}, i64 8, i64 1, !{{.*}}, i64 12, i64 4, !{{.*}}} -// CHECK-OLD: [[CHAR:!.*]] = !{!"omnipotent char", !{{.*}}} -// CHECK-OLD: [[TAG_INT:!.*]] = !{[[INT:!.*]], [[INT]], i64 0} -// CHECK-OLD: [[INT]] = !{!"int", [[CHAR]] -// CHECK-OLD: [[TAG_CHAR:!.*]] = !{[[CHAR]], [[CHAR]], i64 0} // (offset, size) = (0,1) char; (4,2) short; (8,4) int; (12,1) char; (16,4) int; (20,4) int -// CHECK-OLD: [[TS2]] = !{i64 0, i64 1, !{{.*}}, i64 4, i64 2, !{{.*}}, i64 8, i64 4, !{{.*}}, i64 12, i64 1, !{{.*}}, i64 16, i64 4, {{.*}}, i64 20, i64 4, {{.*}}} // (offset, size) = (0,8) char; (0,2) char; (4,8) char -// CHECK-OLD: [[TS3]] = !{i64 0, i64 12, [[TAG_CHAR]]} -// CHECK-OLD: [[TS4]] = !{i64 0, i64 1, [[TAG_CHAR]], i64 1, i64 1, [[TAG_CHAR]], i64 2, i64 1, [[TAG_CHAR]]} -// CHECK-OLD: [[TS5]] = !{i64 0, i64 1, [[TAG_CHAR]], i64 4, i64 1, [[TAG_CHAR]], i64 5, i64 1, [[TAG_CHAR]]} -// CHECK-OLD: [[TS6]] = !{i64 0, i64 2, [[TAG_CHAR]], i64 2, i64 1, [[TAG_CHAR]], i64 8, i64 8, [[TAG_DOUBLE:!.+]]} -// CHECK-OLD: [[TAG_DOUBLE]] = !{[[DOUBLE:!.+]], [[DOUBLE]], i64 0} // CHECK-OLD [[DOUBLE]] = !{!"double", [[CHAR]], i64 0} -// CHECK-OLD: [[TS7]] = !{i64 0, i64 1, [[TAG_CHAR]], i64 1, i64 1, [[TAG_CHAR]], i64 2, i64 1, [[TAG_CHAR]], i64 3, i64 2, [[TAG_CHAR]], i64 8, i64 8, [[TAG_DOUBLE]], i64 16, i64 1, [[TAG_CHAR]]} -// CHECK-OLD: [[TS8]] = !{i64 0, i64 4, [[TAG_CHAR]], i64 8, i64 8, [[TAG_DOUBLE]]} -// CHECK-OLD: [[TS9]] = !{i64 0, i64 8, [[TAG_CHAR]], i64 8, i64 4, [[TAG_INT]]} -// CHECK-OLD: [[TS10]] = !{i64 0, i64 4, [[TAG_INT]], i64 8, i64 8, [[TAG_CHAR]]} - -// CHECK-NEW-DAG: [[TYPE_char:!.*]] = !{{{.*}}, i64 1, !"omnipotent char"} -// CHECK-NEW-DAG: [[TAG_char]] = !{[[TYPE_char]], [[TYPE_char]], i64 0, i64 0} -// CHECK-NEW-DAG: [[TYPE_short:!.*]] = !{[[TYPE_char]], i64 2, !"short"} -// CHECK-NEW-DAG: [[TYPE_int:!.*]] = !{[[TYPE_char]], i64 4, !"int"} -// CHECK-NEW-DAG: [[TYPE_A:!.*]] = !{[[TYPE_char]], i64 16, !"_ZTS1A", [[TYPE_short]], i64 0, i64 2, [[TYPE_int]], i64 4, i64 4, [[TYPE_char]], i64 8, i64 1, [[TYPE_int]], i64 12, i64 4} -// CHECK-NEW-DAG: [[TAG_A]] = !{[[TYPE_A]], [[TYPE_A]], i64 0, i64 16} -// CHECK-NEW-DAG: [[TYPE_B:!.*]] = !{[[TYPE_char]], i64 24, !"_ZTS1B", [[TYPE_char]], i64 0, i64 1, [[TYPE_A]], i64 4, i64 16, [[TYPE_int]], i64 20, i64 4} -// CHECK-NEW-DAG: [[TAG_B]] = !{[[TYPE_B]], [[TYPE_B]], i64 0, i64 24} -// CHECK-NEW-DAG: [[TAG_U]] = !{[[TYPE_char]], [[TYPE_char]], i64 0, i64 12} -// CHECK-NEW-DAG: [[TYPE_C:!.*]] = !{[[TYPE_char]], i64 3, !"_ZTS1C", [[TYPE_char]], i64 0, i64 1, [[TYPE_char]], i64 1, i64 1, [[TYPE_char]], i64 2, i64 1} -// CHECK-NEW-DAG: [[TAG_C]] = !{[[TYPE_C]], [[TYPE_C]], i64 0, i64 3} -// CHECK-NEW-DAG: [[TYPE_D:!.*]] = !{[[TYPE_char]], i64 6, !"_ZTS1D", [[TYPE_char]], i64 0, i64 1, [[TYPE_char]], i64 4, i64 1, [[TYPE_char]], i64 5, i64 1} -// CHECK-NEW-DAG: [[TAG_D]] = !{[[TYPE_D]], [[TYPE_D]], i64 0, i64 6} -// CHECK-NEW-DAG: [[TAG_NamedBitfields]] = !{[[TYPE_NamedBitfields:!.+]], [[TYPE_NamedBitfields]], i64 0, i64 16} -// CHECK-NEW-DAG: [[TYPE_NamedBitfields]] = !{[[TYPE_char]], i64 16, !"_ZTS14NamedBitfields", [[TYPE_int]], i64 0, i64 4, [[TYPE_int]], i64 1, i64 4, [[TYPE_char]], i64 2, i64 1, [[TYPE_double:!.+]], i64 8, i64 8} -// CHECK-NEW-DAG: [[TYPE_double]] = !{[[TYPE_char]], i64 8, !"double"} -// CHECK-NEW-DAG: [[TAG_NamedBitfields2]] = !{[[TYPE_NamedBitfields2:!.+]], [[TYPE_NamedBitfields2]], i64 0, i64 24} -// CHECK-NEW-DAG: [[TYPE_NamedBitfields2]] = !{[[TYPE_char]], i64 24, !"_ZTS15NamedBitfields2", [[TYPE_char]], i64 0, i64 1, [[TYPE_char]], i64 1, i64 1, [[TYPE_char]], i64 2, i64 1, [[TYPE_int]], i64 3, i64 4, [[TYPE_int]], i64 3, i64 4, [[TYPE_char]], i64 4, i64 1, [[TYPE_double]], i64 8, i64 8, [[TYPE_int]], i64 16, i64 4} -// CHECK-NEW-DAG: [[TAG_NamedBitfields3]] = !{[[TYPE_NamedBitfields3:!.+]], [[TYPE_NamedBitfields3]], i64 0, i64 16} -// CHECK-NEW-DAG: [[TYPE_NamedBitfields3]] = !{[[TYPE_char]], i64 16, !"_ZTS15NamedBitfields3", [[TYPE_int]], i64 1, i64 4, [[TYPE_int]], i64 2, i64 4, [[TYPE_double]], i64 8, i64 8} -// CHECK-NEW-DAG: [[TAG_UnionMember1]] = !{[[TYPE_UnionMember1:!.+]], [[TYPE_UnionMember1]], i64 0, i64 16} -// CHECK-NEW-DAG: [[TYPE_UnionMember1]] = !{[[TYPE_char]], i64 16, !"_ZTS12UnionMember1", [[TYPE_char]], i64 0, i64 8, [[TYPE_int]], i64 8, i64 4} -// CHECK-NEW-DAG: [[TAG_UnionMember2]] = !{[[TYPE_UnionMember2:!.+]], [[TYPE_UnionMember2]], i64 0, i64 16} -// CHECK-NEW-DAG: [[TYPE_UnionMember2]] = !{[[TYPE_char]], i64 16, !"_ZTS12UnionMember2", [[TYPE_int]], i64 0, i64 4, [[TYPE_char]], i64 8, i64 8} + +//. +// CHECK-OLD: [[META2:![0-9]+]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-OLD: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK-OLD: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-OLD: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK-OLD: [[TBAA_STRUCT6]] = !{i64 0, i64 2, [[META7:![0-9]+]], i64 4, i64 4, [[META2]], i64 8, i64 1, [[META9:![0-9]+]], i64 12, i64 4, [[META2]]} +// CHECK-OLD: [[META7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// CHECK-OLD: [[META8]] = !{!"short", [[META4]], i64 0} +// CHECK-OLD: [[META9]] = !{[[META4]], [[META4]], i64 0} +// CHECK-OLD: [[TBAA_STRUCT10]] = !{i64 0, i64 1, [[META9]], i64 4, i64 2, [[META7]], i64 8, i64 4, [[META2]], i64 12, i64 1, [[META9]], i64 16, i64 4, [[META2]], i64 20, i64 4, [[META2]]} +// CHECK-OLD: [[TBAA_STRUCT11]] = !{i64 0, i64 12, [[META9]]} +// CHECK-OLD: [[TBAA_STRUCT12]] = !{i64 0, i64 1, [[META9]], i64 1, i64 1, [[META9]], i64 2, i64 1, [[META9]]} +// CHECK-OLD: [[TBAA_STRUCT13]] = !{i64 0, i64 1, [[META9]], i64 4, i64 1, [[META9]], i64 5, i64 1, [[META9]]} +// CHECK-OLD: [[TBAA_STRUCT14]] = !{i64 0, i64 2, [[META9]], i64 2, i64 1, [[META9]], i64 8, i64 8, [[META15:![0-9]+]]} +// CHECK-OLD: [[META15]] = !{[[META16:![0-9]+]], [[META16]], i64 0} +// CHECK-OLD: [[META16]] = !{!"double", [[META4]], i64 0} +// CHECK-OLD: [[TBAA_STRUCT17]] = !{i64 0, i64 1, [[META9]], i64 1, i64 1, [[META9]], i64 2, i64 1, [[META9]], i64 3, i64 2, [[META9]], i64 8, i64 8, [[META15]], i64 16, i64 1, [[META9]]} +// CHECK-OLD: [[TBAA_STRUCT18]] = !{i64 0, i64 4, [[META9]], i64 8, i64 8, [[META15]]} +// CHECK-OLD: [[TBAA_STRUCT19]] = !{i64 0, i64 8, [[META9]], i64 8, i64 4, [[META2]]} +// CHECK-OLD: [[TBAA_STRUCT20]] = !{i64 0, i64 4, [[META2]], i64 8, i64 8, [[META9]]} +//. +// CHECK-NEW: [[META2:![0-9]+]] = !{[[META3:![0-9]+]], [[META3]], i64 0, i64 4} +// CHECK-NEW: [[META3]] = !{[[META4:![0-9]+]], i64 4, !"int"} +// CHECK-NEW: [[META4]] = !{[[META5:![0-9]+]], i64 1, !"omnipotent char"} +// CHECK-NEW: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK-NEW: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0, i64 16} +// CHECK-NEW: [[META7]] = !{[[META4]], i64 16, !"_ZTS1A", [[META8:![0-9]+]], i64 0, i64 2, [[META3]], i64 4, i64 4, [[META4]], i64 8, i64 1, [[META3]], i64 12, i64 4} +// CHECK-NEW: [[META8]] = !{[[META4]], i64 2, !"short"} +// CHECK-NEW: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0, i64 24} +// CHECK-NEW: [[META13]] = !{[[META4]], i64 24, !"_ZTS1B", [[META4]], i64 0, i64 1, [[META7]], i64 4, i64 16, [[META3]], i64 20, i64 4} +// CHECK-NEW: [[TBAA15]] = !{[[META4]], [[META4]], i64 0, i64 12} +// CHECK-NEW: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0, i64 3} +// CHECK-NEW: [[META18]] = !{[[META4]], i64 3, !"_ZTS1C", [[META4]], i64 0, i64 1, [[META4]], i64 1, i64 1, [[META4]], i64 2, i64 1} +// CHECK-NEW: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0, i64 6} +// CHECK-NEW: [[META21]] = !{[[META4]], i64 6, !"_ZTS1D", [[META4]], i64 0, i64 1, [[META4]], i64 4, i64 1, [[META4]], i64 5, i64 1} +// CHECK-NEW: [[TBAA23]] = !{[[META4]], [[META4]], i64 0, i64 0} +// CHECK-NEW: [[TBAA24]] = !{[[META25:![0-9]+]], [[META25]], i64 0, i64 16} +// CHECK-NEW: [[META25]] = !{[[META4]], i64 16, !"_ZTS14NamedBitfields", [[META3]], i64 0, i64 4, [[META3]], i64 1, i64 4, [[META4]], i64 2, i64 1, [[META26:![0-9]+]], i64 8, i64 8} +// CHECK-NEW: [[META26]] = !{[[META4]], i64 8, !"double"} +// CHECK-NEW: [[TBAA30]] = !{[[META31:![0-9]+]], [[META31]], i64 0, i64 24} +// CHECK-NEW: [[META31]] = !{[[META4]], i64 24, !"_ZTS15NamedBitfields2", [[META4]], i64 0, i64 1, [[META4]], i64 1, i64 1, [[META4]], i64 2, i64 1, [[META3]], i64 3, i64 4, [[META3]], i64 3, i64 4, [[META4]], i64 4, i64 1, [[META26]], i64 8, i64 8, [[META3]], i64 16, i64 4} +// CHECK-NEW: [[TBAA33]] = !{[[META34:![0-9]+]], [[META34]], i64 0, i64 16} +// CHECK-NEW: [[META34]] = !{[[META4]], i64 16, !"_ZTS15NamedBitfields3", [[META3]], i64 1, i64 4, [[META3]], i64 2, i64 4, [[META26]], i64 8, i64 8} +// CHECK-NEW: [[TBAA37]] = !{[[META38:![0-9]+]], [[META38]], i64 0, i64 16} +// CHECK-NEW: [[META38]] = !{[[META4]], i64 16, !"_ZTS12UnionMember1", [[META4]], i64 0, i64 8, [[META3]], i64 8, i64 4} +// CHECK-NEW: [[TBAA41]] = !{[[META42:![0-9]+]], [[META42]], i64 0, i64 16} +// CHECK-NEW: [[META42]] = !{[[META4]], i64 16, !"_ZTS12UnionMember2", [[META3]], i64 0, i64 4, [[META4]], i64 8, i64 8} +//. diff --git a/clang/test/CodeGen/tbaa.c b/clang/test/CodeGen/tbaa.c index 0ab81f60a7194..a719c0398e79b 100644 --- a/clang/test/CodeGen/tbaa.c +++ b/clang/test/CodeGen/tbaa.c @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -no-struct-path-tbaa -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s -check-prefixes=PATH // RUN: %clang_cc1 -triple x86_64-apple-darwin -O0 -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s -check-prefix=NO-TBAA @@ -37,13 +38,13 @@ typedef enum : uint8_t { uint32_t g0(EnumAuto32 *E, uint32_t *val) { // CHECK-LABEL: define{{.*}} i32 @g0( -// CHECK: store i32 5, ptr %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]] -// CHECK: store i32 0, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] -// CHECK: load i32, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] +// CHECK: store i32 5, ptr %{{.*}}, align 4, !tbaa [[TBAA2:![0-9]+]] +// CHECK: store i32 0, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] +// CHECK: load i32, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] // PATH-LABEL: define{{.*}} i32 @g0( -// PATH: store i32 5, ptr %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]] -// PATH: store i32 0, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] -// PATH: load i32, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] +// PATH: store i32 5, ptr %{{.*}}, align 4, !tbaa [[TBAA2:![0-9]+]] +// PATH: store i32 0, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] +// PATH: load i32, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] *val = 5; *E = RED_AUTO_32; return *val; @@ -51,13 +52,13 @@ uint32_t g0(EnumAuto32 *E, uint32_t *val) { uint64_t g1(EnumAuto64 *E, uint64_t *val) { // CHECK-LABEL: define{{.*}} i64 @g1( -// CHECK: store i64 5, ptr %{{.*}}, align 8, !tbaa [[TAG_i64:!.*]] -// CHECK: store i64 0, ptr %{{.*}}, align 8, !tbaa [[TAG_long:!.*]] -// CHECK: load i64, ptr %{{.*}}, align 8, !tbaa [[TAG_i64]] +// CHECK: store i64 5, ptr %{{.*}}, align 8, !tbaa [[TBAA12:![0-9]+]] +// CHECK: store i64 0, ptr %{{.*}}, align 8, !tbaa [[TBAA14:![0-9]+]] +// CHECK: load i64, ptr %{{.*}}, align 8, !tbaa [[TBAA12]] // PATH-LABEL: define{{.*}} i64 @g1( -// PATH: store i64 5, ptr %{{.*}}, align 8, !tbaa [[TAG_i64:!.*]] -// PATH: store i64 0, ptr %{{.*}}, align 8, !tbaa [[TAG_long:!.*]] -// PATH: load i64, ptr %{{.*}}, align 8, !tbaa [[TAG_i64]] +// PATH: store i64 5, ptr %{{.*}}, align 8, !tbaa [[TBAA12:![0-9]+]] +// PATH: store i64 0, ptr %{{.*}}, align 8, !tbaa [[TBAA14:![0-9]+]] +// PATH: load i64, ptr %{{.*}}, align 8, !tbaa [[TBAA12]] *val = 5; *E = RED_AUTO_64; return *val; @@ -65,13 +66,13 @@ uint64_t g1(EnumAuto64 *E, uint64_t *val) { uint16_t g2(Enum16 *E, uint16_t *val) { // CHECK-LABEL: define{{.*}} i16 @g2( -// CHECK: store i16 5, ptr %{{.*}}, align 2, !tbaa [[TAG_i16:!.*]] -// CHECK: store i16 0, ptr %{{.*}}, align 2, !tbaa [[TAG_i16]] -// CHECK: load i16, ptr %{{.*}}, align 2, !tbaa [[TAG_i16]] +// CHECK: store i16 5, ptr %{{.*}}, align 2, !tbaa [[TBAA18:![0-9]+]] +// CHECK: store i16 0, ptr %{{.*}}, align 2, !tbaa [[TBAA18]] +// CHECK: load i16, ptr %{{.*}}, align 2, !tbaa [[TBAA18]] // PATH-LABEL: define{{.*}} i16 @g2( -// PATH: store i16 5, ptr %{{.*}}, align 2, !tbaa [[TAG_i16:!.*]] -// PATH: store i16 0, ptr %{{.*}}, align 2, !tbaa [[TAG_i16]] -// PATH: load i16, ptr %{{.*}}, align 2, !tbaa [[TAG_i16]] +// PATH: store i16 5, ptr %{{.*}}, align 2, !tbaa [[TBAA18:![0-9]+]] +// PATH: store i16 0, ptr %{{.*}}, align 2, !tbaa [[TBAA18]] +// PATH: load i16, ptr %{{.*}}, align 2, !tbaa [[TBAA18]] *val = 5; *E = RED_16; return *val; @@ -79,38 +80,40 @@ uint16_t g2(Enum16 *E, uint16_t *val) { uint8_t g3(Enum8 *E, uint8_t *val) { // CHECK-LABEL: define{{.*}} i8 @g3( -// CHECK: store i8 5, ptr %{{.*}}, align 1, !tbaa [[TAG_i8:!.*]] -// CHECK: store i8 0, ptr %{{.*}}, align 1, !tbaa [[TAG_i8]] -// CHECK: load i8, ptr %{{.*}}, align 1, !tbaa [[TAG_i8]] +// CHECK: store i8 5, ptr %{{.*}}, align 1, !tbaa [[TBAA22:![0-9]+]] +// CHECK: store i8 0, ptr %{{.*}}, align 1, !tbaa [[TBAA22]] +// CHECK: load i8, ptr %{{.*}}, align 1, !tbaa [[TBAA22]] // PATH-LABEL: define{{.*}} i8 @g3( -// PATH: store i8 5, ptr %{{.*}}, align 1, !tbaa [[TAG_i8:!.*]] -// PATH: store i8 0, ptr %{{.*}}, align 1, !tbaa [[TAG_i8]] -// PATH: load i8, ptr %{{.*}}, align 1, !tbaa [[TAG_i8]] +// PATH: store i8 5, ptr %{{.*}}, align 1, !tbaa [[TBAA22:![0-9]+]] +// PATH: store i8 0, ptr %{{.*}}, align 1, !tbaa [[TBAA22]] +// PATH: load i8, ptr %{{.*}}, align 1, !tbaa [[TBAA22]] *val = 5; *E = RED_8; return *val; } -// CHECK: [[TYPE_char:!.*]] = !{!"omnipotent char", [[TAG_c_tbaa:!.*]], -// CHECK: [[TAG_c_tbaa]] = !{!"Simple C/C++ TBAA"} -// CHECK: [[TAG_i32]] = !{[[TYPE_i32:!.*]], [[TYPE_i32]], i64 0} -// CHECK: [[TYPE_i32]] = !{!"int", [[TYPE_char]], -// CHECK: [[TAG_i64]] = !{[[TYPE_i64:!.*]], [[TYPE_i64]], i64 0} -// CHECK: [[TYPE_i64]] = !{!"long long", [[TYPE_char]], -// CHECK: [[TAG_long]] = !{[[TYPE_long:!.*]], [[TYPE_long]], i64 0} -// CHECK: [[TYPE_long]] = !{!"long", [[TYPE_char]], -// CHECK: [[TAG_i16]] = !{[[TYPE_i16:!.*]], [[TYPE_i16]], i64 0} -// CHECK: [[TYPE_i16]] = !{!"short", [[TYPE_char]], -// CHECK: [[TAG_i8]] = !{[[TYPE_i8:!.*]], [[TYPE_char]], i64 0} - -// PATH: [[TYPE_char:!.*]] = !{!"omnipotent char", [[TAG_c_tbaa:!.*]], -// PATH: [[TAG_c_tbaa]] = !{!"Simple C/C++ TBAA"} -// PATH: [[TAG_i32]] = !{[[TYPE_i32:!.*]], [[TYPE_i32]], i64 0} -// PATH: [[TYPE_i32]] = !{!"int", [[TYPE_char]], -// PATH: [[TAG_i64]] = !{[[TYPE_i64:!.*]], [[TYPE_i64]], i64 0} -// PATH: [[TYPE_i64]] = !{!"long long", [[TYPE_char]], -// PATH: [[TAG_long]] = !{[[TYPE_long:!.*]], [[TYPE_long]], i64 0} -// PATH: [[TYPE_long]] = !{!"long", [[TYPE_char]], -// PATH: [[TAG_i16]] = !{[[TYPE_i16:!.*]], [[TYPE_i16]], i64 0} -// PATH: [[TYPE_i16]] = !{!"short", [[TYPE_char]], -// PATH: [[TAG_i8]] = !{[[TYPE_i8:!.*]], [[TYPE_char]], i64 0} +//. +// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// CHECK: [[META13]] = !{!"long long", [[META4]], i64 0} +// CHECK: [[TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0} +// CHECK: [[META15]] = !{!"long", [[META4]], i64 0} +// CHECK: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} +// CHECK: [[META19]] = !{!"short", [[META4]], i64 0} +// CHECK: [[TBAA22]] = !{[[META4]], [[META4]], i64 0} +//. +// PATH: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// PATH: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// PATH: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// PATH: [[META5]] = !{!"Simple C/C++ TBAA"} +// PATH: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// PATH: [[META13]] = !{!"long long", [[META4]], i64 0} +// PATH: [[TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0} +// PATH: [[META15]] = !{!"long", [[META4]], i64 0} +// PATH: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} +// PATH: [[META19]] = !{!"short", [[META4]], i64 0} +// PATH: [[TBAA22]] = !{[[META4]], [[META4]], i64 0} +//. diff --git a/clang/test/CodeGen/tbaa.cpp b/clang/test/CodeGen/tbaa.cpp index 3e92d1ea3df95..29c0c58432e06 100644 --- a/clang/test/CodeGen/tbaa.cpp +++ b/clang/test/CodeGen/tbaa.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -no-struct-path-tbaa -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s -check-prefixes=PATH,OLD-PATH // RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -new-struct-path-tbaa -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s -check-prefixes=PATH,NEW-PATH @@ -51,11 +52,11 @@ typedef struct uint32_t g(uint32_t *s, StructA *A, uint64_t count) { // CHECK-LABEL: define{{.*}} i32 @_Z1g -// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]] -// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] +// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA2:![0-9]+]] +// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] // PATH-LABEL: define{{.*}} i32 @_Z1g -// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]] -// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_A_f32:!.*]] +// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA2:![0-9]+]] +// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA12:![0-9]+]] *s = 1; A->f32 = 4; return *s; @@ -63,11 +64,11 @@ uint32_t g(uint32_t *s, StructA *A, uint64_t count) { uint32_t g2(uint32_t *s, StructA *A, uint64_t count) { // CHECK-LABEL: define{{.*}} i32 @_Z2g2 -// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] -// CHECK: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i16:!.*]] +// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] +// CHECK: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TBAA12:![0-9]+]] // PATH-LABEL: define{{.*}} i32 @_Z2g2 -// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] -// PATH: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TAG_A_f16:!.*]] +// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] +// PATH: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TBAA15:![0-9]+]] *s = 1; A->f16 = 4; return *s; @@ -75,11 +76,11 @@ uint32_t g2(uint32_t *s, StructA *A, uint64_t count) { uint32_t g3(StructA *A, StructB *B, uint64_t count) { // CHECK-LABEL: define{{.*}} i32 @_Z2g3 -// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] -// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] +// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] +// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] // PATH-LABEL: define{{.*}} i32 @_Z2g3 -// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_A_f32]] -// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_B_a_f32:!.*]] +// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA12]] +// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA16:![0-9]+]] A->f32 = 1; B->a.f32 = 4; return A->f32; @@ -87,11 +88,11 @@ uint32_t g3(StructA *A, StructB *B, uint64_t count) { uint32_t g4(StructA *A, StructB *B, uint64_t count) { // CHECK-LABEL: define{{.*}} i32 @_Z2g4 -// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] -// CHECK: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i16]] +// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] +// CHECK: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TBAA12]] // PATH-LABEL: define{{.*}} i32 @_Z2g4 -// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_A_f32]] -// PATH: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TAG_B_a_f16:!.*]] +// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA12]] +// PATH: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TBAA18:![0-9]+]] A->f32 = 1; B->a.f16 = 4; return A->f32; @@ -99,11 +100,11 @@ uint32_t g4(StructA *A, StructB *B, uint64_t count) { uint32_t g5(StructA *A, StructB *B, uint64_t count) { // CHECK-LABEL: define{{.*}} i32 @_Z2g5 -// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] -// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] +// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] +// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] // PATH-LABEL: define{{.*}} i32 @_Z2g5 -// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_A_f32]] -// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_B_f32:!.*]] +// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA12]] +// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA19:![0-9]+]] A->f32 = 1; B->f32 = 4; return A->f32; @@ -111,11 +112,11 @@ uint32_t g5(StructA *A, StructB *B, uint64_t count) { uint32_t g6(StructA *A, StructB *B, uint64_t count) { // CHECK-LABEL: define{{.*}} i32 @_Z2g6 -// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] -// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] +// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] +// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] // PATH-LABEL: define{{.*}} i32 @_Z2g6 -// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_A_f32]] -// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_B_a_f32_2:!.*]] +// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA12]] +// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA20:![0-9]+]] A->f32 = 1; B->a.f32_2 = 4; return A->f32; @@ -123,11 +124,11 @@ uint32_t g6(StructA *A, StructB *B, uint64_t count) { uint32_t g7(StructA *A, StructS *S, uint64_t count) { // CHECK-LABEL: define{{.*}} i32 @_Z2g7 -// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] -// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] +// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] +// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] // PATH-LABEL: define{{.*}} i32 @_Z2g7 -// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_A_f32]] -// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_S_f32:!.*]] +// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA12]] +// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA21:![0-9]+]] A->f32 = 1; S->f32 = 4; return A->f32; @@ -135,11 +136,11 @@ uint32_t g7(StructA *A, StructS *S, uint64_t count) { uint32_t g8(StructA *A, StructS *S, uint64_t count) { // CHECK-LABEL: define{{.*}} i32 @_Z2g8 -// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] -// CHECK: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i16]] +// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] +// CHECK: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TBAA12]] // PATH-LABEL: define{{.*}} i32 @_Z2g8 -// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_A_f32]] -// PATH: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TAG_S_f16:!.*]] +// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA12]] +// PATH: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TBAA23:![0-9]+]] A->f32 = 1; S->f16 = 4; return A->f32; @@ -147,11 +148,11 @@ uint32_t g8(StructA *A, StructS *S, uint64_t count) { uint32_t g9(StructS *S, StructS2 *S2, uint64_t count) { // CHECK-LABEL: define{{.*}} i32 @_Z2g9 -// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] -// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] +// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] +// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] // PATH-LABEL: define{{.*}} i32 @_Z2g9 -// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_S_f32]] -// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_S2_f32:!.*]] +// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA21]] +// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA24:![0-9]+]] S->f32 = 1; S2->f32 = 4; return S->f32; @@ -159,11 +160,11 @@ uint32_t g9(StructS *S, StructS2 *S2, uint64_t count) { uint32_t g10(StructS *S, StructS2 *S2, uint64_t count) { // CHECK-LABEL: define{{.*}} i32 @_Z3g10 -// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] -// CHECK: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i16]] +// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] +// CHECK: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TBAA12]] // PATH-LABEL: define{{.*}} i32 @_Z3g10 -// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_S_f32]] -// PATH: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TAG_S2_f16:!.*]] +// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA21]] +// PATH: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TBAA26:![0-9]+]] S->f32 = 1; S2->f16 = 4; return S->f32; @@ -171,11 +172,11 @@ uint32_t g10(StructS *S, StructS2 *S2, uint64_t count) { uint32_t g11(StructC *C, StructD *D, uint64_t count) { // CHECK-LABEL: define{{.*}} i32 @_Z3g11 -// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] -// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] +// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] +// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] // PATH-LABEL: define{{.*}} i32 @_Z3g11 -// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_C_b_a_f32:!.*]] -// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_D_b_a_f32:!.*]] +// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA27:![0-9]+]] +// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA29:![0-9]+]] C->b.a.f32 = 1; D->b.a.f32 = 4; return C->b.a.f32; @@ -183,12 +184,12 @@ uint32_t g11(StructC *C, StructD *D, uint64_t count) { uint32_t g12(StructC *C, StructD *D, uint64_t count) { // CHECK-LABEL: define{{.*}} i32 @_Z3g12 -// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] -// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] +// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] +// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] // TODO: differentiate the two accesses. // PATH-LABEL: define{{.*}} i32 @_Z3g12 -// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_B_a_f32]] -// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_B_a_f32]] +// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA16]] +// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA16]] StructB *b1 = &(C->b); StructB *b2 = &(D->b); // b1, b2 have different context. @@ -208,9 +209,9 @@ struct five { char g13(struct five *a, struct five *b) { return a->b; // CHECK-LABEL: define{{.*}} signext i8 @_Z3g13 -// CHECK: load i8, ptr %{{.*}}, align 1, !tbaa [[TAG_char:!.*]] +// CHECK: load i8, ptr %{{.*}}, align 1, !tbaa [[TBAA16:![0-9]+]] // PATH-LABEL: define{{.*}} signext i8 @_Z3g13 -// PATH: load i8, ptr %{{.*}}, align 1, !tbaa [[TAG_five_b:!.*]] +// PATH: load i8, ptr %{{.*}}, align 1, !tbaa [[TBAA33:![0-9]+]] } struct six { @@ -221,9 +222,9 @@ struct six { }; char g14(struct six *a, struct six *b) { // CHECK-LABEL: define{{.*}} signext i8 @_Z3g14 -// CHECK: load i8, ptr %{{.*}}, align 1, !tbaa [[TAG_char]] +// CHECK: load i8, ptr %{{.*}}, align 1, !tbaa [[TBAA16]] // PATH-LABEL: define{{.*}} signext i8 @_Z3g14 -// PATH: load i8, ptr %{{.*}}, align 1, !tbaa [[TAG_six_b:!.*]] +// PATH: load i8, ptr %{{.*}}, align 1, !tbaa [[TBAA37:![0-9]+]] return a->b; } @@ -231,75 +232,78 @@ char g14(struct six *a, struct six *b) { typedef StructS StructS3; uint32_t g15(StructS *S, StructS3 *S3, uint64_t count) { // CHECK-LABEL: define{{.*}} i32 @_Z3g15 -// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] -// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]] +// CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] +// CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA2]] // PATH-LABEL: define{{.*}} i32 @_Z3g15 -// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_S_f32]] -// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_S_f32]] +// PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA21]] +// PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TBAA21]] S->f32 = 1; S3->f32 = 4; return S->f32; } -// CHECK: [[TYPE_char:!.*]] = !{!"omnipotent char", [[TAG_cxx_tbaa:!.*]], -// CHECK: [[TAG_cxx_tbaa]] = !{!"Simple C++ TBAA"} -// CHECK: [[TAG_i32]] = !{[[TYPE_i32:!.*]], [[TYPE_i32]], i64 0} -// CHECK: [[TYPE_i32]] = !{!"int", [[TYPE_char]], -// CHECK: [[TAG_i16]] = !{[[TYPE_i16:!.*]], [[TYPE_i16]], i64 0} -// CHECK: [[TYPE_i16]] = !{!"short", [[TYPE_char]], -// CHECK: [[TAG_char]] = !{[[TYPE_char]], [[TYPE_char]], i64 0} - -// OLD-PATH: [[TYPE_CHAR:!.*]] = !{!"omnipotent char", ! -// OLD-PATH: [[TAG_i32]] = !{[[TYPE_INT:!.*]], [[TYPE_INT]], i64 0} -// OLD-PATH: [[TYPE_INT]] = !{!"int", [[TYPE_CHAR]] -// OLD-PATH: [[TAG_A_f32]] = !{[[TYPE_A:!.*]], [[TYPE_INT]], i64 4} -// OLD-PATH: [[TYPE_A]] = !{!"_ZTS7StructA", [[TYPE_SHORT:!.*]], i64 0, [[TYPE_INT]], i64 4, [[TYPE_SHORT]], i64 8, [[TYPE_INT]], i64 12} -// OLD-PATH: [[TYPE_SHORT:!.*]] = !{!"short", [[TYPE_CHAR]] -// OLD-PATH: [[TAG_A_f16]] = !{[[TYPE_A]], [[TYPE_SHORT]], i64 0} -// OLD-PATH: [[TAG_B_a_f32]] = !{[[TYPE_B:!.*]], [[TYPE_INT]], i64 8} -// OLD-PATH: [[TYPE_B]] = !{!"_ZTS7StructB", [[TYPE_SHORT]], i64 0, [[TYPE_A]], i64 4, [[TYPE_INT]], i64 20} -// OLD-PATH: [[TAG_B_a_f16]] = !{[[TYPE_B]], [[TYPE_SHORT]], i64 4} -// OLD-PATH: [[TAG_B_f32]] = !{[[TYPE_B]], [[TYPE_INT]], i64 20} -// OLD-PATH: [[TAG_B_a_f32_2]] = !{[[TYPE_B]], [[TYPE_INT]], i64 16} -// OLD-PATH: [[TAG_S_f32]] = !{[[TYPE_S:!.*]], [[TYPE_INT]], i64 4} -// OLD-PATH: [[TYPE_S]] = !{!"_ZTS7StructS", [[TYPE_SHORT]], i64 0, [[TYPE_INT]], i64 4} -// OLD-PATH: [[TAG_S_f16]] = !{[[TYPE_S]], [[TYPE_SHORT]], i64 0} -// OLD-PATH: [[TAG_S2_f32]] = !{[[TYPE_S2:!.*]], [[TYPE_INT]], i64 4} -// OLD-PATH: [[TYPE_S2]] = !{!"_ZTS8StructS2", [[TYPE_SHORT]], i64 0, [[TYPE_INT]], i64 4} -// OLD-PATH: [[TAG_S2_f16]] = !{[[TYPE_S2]], [[TYPE_SHORT]], i64 0} -// OLD-PATH: [[TAG_C_b_a_f32]] = !{[[TYPE_C:!.*]], [[TYPE_INT]], i64 12} -// OLD-PATH: [[TYPE_C]] = !{!"_ZTS7StructC", [[TYPE_SHORT]], i64 0, [[TYPE_B]], i64 4, [[TYPE_INT]], i64 28} -// OLD-PATH: [[TAG_D_b_a_f32]] = !{[[TYPE_D:!.*]], [[TYPE_INT]], i64 12} -// OLD-PATH: [[TYPE_D]] = !{!"_ZTS7StructD", [[TYPE_SHORT]], i64 0, [[TYPE_B]], i64 4, [[TYPE_INT]], i64 28, [[TYPE_CHAR]], i64 32} -// OLD-PATH: [[TAG_five_b]] = !{[[TYPE_five:!.*]], [[TYPE_CHAR]], i64 1} -// OLD-PATH: [[TYPE_five]] = !{!"_ZTS4five", [[TYPE_CHAR]], i64 0, [[TYPE_CHAR]], i64 1, [[TYPE_CHAR]], i64 2} -// OLD-PATH: [[TAG_six_b]] = !{[[TYPE_six:!.*]], [[TYPE_CHAR]], i64 4} -// OLD-PATH: [[TYPE_six]] = !{!"_ZTS3six", [[TYPE_CHAR]], i64 0, [[TYPE_CHAR]], i64 4, [[TYPE_CHAR]], i64 5} - -// NEW-PATH-DAG: [[ROOT:!.*]] = !{!"Simple C++ TBAA"} -// NEW-PATH-DAG: [[TYPE_char:!.*]] = !{[[ROOT]], i64 1, !"omnipotent char"} -// NEW-PATH-DAG: [[TYPE_short:!.*]] = !{[[TYPE_char]], i64 2, !"short"} -// NEW-PATH-DAG: [[TYPE_int:!.*]] = !{[[TYPE_char]], i64 4, !"int"} -// NEW-PATH-DAG: [[TAG_i32:!.*]] = !{[[TYPE_int]], [[TYPE_int]], i64 0, i64 4} -// NEW-PATH-DAG: [[TYPE_A:!.*]] = !{[[TYPE_char]], i64 16, !"_ZTS7StructA", [[TYPE_short]], i64 0, i64 2, [[TYPE_int]], i64 4, i64 4, [[TYPE_short]], i64 8, i64 2, [[TYPE_int]], i64 12, i64 4} -// NEW-PATH-DAG: [[TAG_A_f16]] = !{[[TYPE_A]], [[TYPE_short]], i64 0, i64 2} -// NEW-PATH-DAG: [[TAG_A_f32]] = !{[[TYPE_A]], [[TYPE_int]], i64 4, i64 4} -// NEW-PATH-DAG: [[TYPE_B:!.*]] = !{[[TYPE_char]], i64 24, !"_ZTS7StructB", [[TYPE_short]], i64 0, i64 2, [[TYPE_A]], i64 4, i64 16, [[TYPE_int]], i64 20, i64 4} -// NEW-PATH-DAG: [[TAG_B_a_f16]] = !{[[TYPE_B]], [[TYPE_short]], i64 4, i64 2} -// NEW-PATH-DAG: [[TAG_B_a_f32]] = !{[[TYPE_B]], [[TYPE_int]], i64 8, i64 4} -// NEW-PATH-DAG: [[TAG_B_f32]] = !{[[TYPE_B]], [[TYPE_int]], i64 20, i64 4} -// NEW-PATH-DAG: [[TAG_B_a_f32_2]] = !{[[TYPE_B]], [[TYPE_int]], i64 16, i64 4} -// NEW-PATH-DAG: [[TYPE_S:!.*]] = !{[[TYPE_char]], i64 8, !"_ZTS7StructS", [[TYPE_short]], i64 0, i64 2, [[TYPE_int]], i64 4, i64 4} -// NEW-PATH-DAG: [[TAG_S_f16]] = !{[[TYPE_S]], [[TYPE_short]], i64 0, i64 2} -// NEW-PATH-DAG: [[TAG_S_f32]] = !{[[TYPE_S]], [[TYPE_int]], i64 4, i64 4} -// NEW-PATH-DAG: [[TYPE_S2:!.*]] = !{[[TYPE_char]], i64 8, !"_ZTS8StructS2", [[TYPE_short]], i64 0, i64 2, [[TYPE_int]], i64 4, i64 4} -// NEW-PATH-DAG: [[TAG_S2_f16]] = !{[[TYPE_S2]], [[TYPE_short]], i64 0, i64 2} -// NEW-PATH-DAG: [[TAG_S2_f32]] = !{[[TYPE_S2]], [[TYPE_int]], i64 4, i64 4} -// NEW-PATH-DAG: [[TYPE_C:!.*]] = !{[[TYPE_char]], i64 32, !"_ZTS7StructC", [[TYPE_short]], i64 0, i64 2, [[TYPE_B]], i64 4, i64 24, [[TYPE_int]], i64 28, i64 4} -// NEW-PATH-DAG: [[TAG_C_b_a_f32]] = !{[[TYPE_C]], [[TYPE_int]], i64 12, i64 4} -// NEW-PATH-DAG: [[TYPE_D:!.*]] = !{[[TYPE_char]], i64 36, !"_ZTS7StructD", [[TYPE_short]], i64 0, i64 2, [[TYPE_B]], i64 4, i64 24, [[TYPE_int]], i64 28, i64 4, [[TYPE_char]], i64 32, i64 1} -// NEW-PATH-DAG: [[TAG_D_b_a_f32]] = !{[[TYPE_D]], [[TYPE_int]], i64 12, i64 4} -// NEW-PATH-DAG: [[TYPE_five:!.*]] = !{[[TYPE_char]], i64 3, !"_ZTS4five", [[TYPE_char]], i64 0, i64 1, [[TYPE_char]], i64 1, i64 1, [[TYPE_char]], i64 2, i64 1} -// NEW-PATH-DAG: [[TAG_five_b]] = !{[[TYPE_five]], [[TYPE_char]], i64 1, i64 1} -// NEW-PATH-DAG: [[TYPE_six:!.*]] = !{[[TYPE_char]], i64 6, !"_ZTS3six", [[TYPE_char]], i64 0, i64 1, [[TYPE_char]], i64 4, i64 1, [[TYPE_char]], i64 5, i64 1} -// NEW-PATH-DAG: [[TAG_six_b]] = !{[[TYPE_six]], [[TYPE_char]], i64 4, i64 1} +//. +// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// CHECK: [[META13]] = !{!"short", [[META4]], i64 0} +// CHECK: [[TBAA16]] = !{[[META4]], [[META4]], i64 0} +//. +// OLD-PATH: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// OLD-PATH: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// OLD-PATH: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// OLD-PATH: [[META5]] = !{!"Simple C++ TBAA"} +// OLD-PATH: [[TBAA12]] = !{[[META13:![0-9]+]], [[META3]], i64 4} +// OLD-PATH: [[META13]] = !{!"_ZTS7StructA", [[META14:![0-9]+]], i64 0, [[META3]], i64 4, [[META14]], i64 8, [[META3]], i64 12} +// OLD-PATH: [[META14]] = !{!"short", [[META4]], i64 0} +// OLD-PATH: [[TBAA15]] = !{[[META13]], [[META14]], i64 0} +// OLD-PATH: [[TBAA16]] = !{[[META17:![0-9]+]], [[META3]], i64 8} +// OLD-PATH: [[META17]] = !{!"_ZTS7StructB", [[META14]], i64 0, [[META13]], i64 4, [[META3]], i64 20} +// OLD-PATH: [[TBAA18]] = !{[[META17]], [[META14]], i64 4} +// OLD-PATH: [[TBAA19]] = !{[[META17]], [[META3]], i64 20} +// OLD-PATH: [[TBAA20]] = !{[[META17]], [[META3]], i64 16} +// OLD-PATH: [[TBAA21]] = !{[[META22:![0-9]+]], [[META3]], i64 4} +// OLD-PATH: [[META22]] = !{!"_ZTS7StructS", [[META14]], i64 0, [[META3]], i64 4} +// OLD-PATH: [[TBAA23]] = !{[[META22]], [[META14]], i64 0} +// OLD-PATH: [[TBAA24]] = !{[[META25:![0-9]+]], [[META3]], i64 4} +// OLD-PATH: [[META25]] = !{!"_ZTS8StructS2", [[META14]], i64 0, [[META3]], i64 4} +// OLD-PATH: [[TBAA26]] = !{[[META25]], [[META14]], i64 0} +// OLD-PATH: [[TBAA27]] = !{[[META28:![0-9]+]], [[META3]], i64 12} +// OLD-PATH: [[META28]] = !{!"_ZTS7StructC", [[META14]], i64 0, [[META17]], i64 4, [[META3]], i64 28} +// OLD-PATH: [[TBAA29]] = !{[[META30:![0-9]+]], [[META3]], i64 12} +// OLD-PATH: [[META30]] = !{!"_ZTS7StructD", [[META14]], i64 0, [[META17]], i64 4, [[META3]], i64 28, [[META4]], i64 32} +// OLD-PATH: [[TBAA33]] = !{[[META34:![0-9]+]], [[META4]], i64 1} +// OLD-PATH: [[META34]] = !{!"_ZTS4five", [[META4]], i64 0, [[META4]], i64 1, [[META4]], i64 2} +// OLD-PATH: [[TBAA37]] = !{[[META38:![0-9]+]], [[META4]], i64 4} +// OLD-PATH: [[META38]] = !{!"_ZTS3six", [[META4]], i64 0, [[META4]], i64 4, [[META4]], i64 5} +//. +// NEW-PATH: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0, i64 4} +// NEW-PATH: [[META3]] = !{[[META4:![0-9]+]], i64 4, !"int"} +// NEW-PATH: [[META4]] = !{[[META5:![0-9]+]], i64 1, !"omnipotent char"} +// NEW-PATH: [[META5]] = !{!"Simple C++ TBAA"} +// NEW-PATH: [[TBAA12]] = !{[[META13:![0-9]+]], [[META3]], i64 4, i64 4} +// NEW-PATH: [[META13]] = !{[[META4]], i64 16, !"_ZTS7StructA", [[META14:![0-9]+]], i64 0, i64 2, [[META3]], i64 4, i64 4, [[META14]], i64 8, i64 2, [[META3]], i64 12, i64 4} +// NEW-PATH: [[META14]] = !{[[META4]], i64 2, !"short"} +// NEW-PATH: [[TBAA15]] = !{[[META13]], [[META14]], i64 0, i64 2} +// NEW-PATH: [[TBAA16]] = !{[[META17:![0-9]+]], [[META3]], i64 8, i64 4} +// NEW-PATH: [[META17]] = !{[[META4]], i64 24, !"_ZTS7StructB", [[META14]], i64 0, i64 2, [[META13]], i64 4, i64 16, [[META3]], i64 20, i64 4} +// NEW-PATH: [[TBAA18]] = !{[[META17]], [[META14]], i64 4, i64 2} +// NEW-PATH: [[TBAA19]] = !{[[META17]], [[META3]], i64 20, i64 4} +// NEW-PATH: [[TBAA20]] = !{[[META17]], [[META3]], i64 16, i64 4} +// NEW-PATH: [[TBAA21]] = !{[[META22:![0-9]+]], [[META3]], i64 4, i64 4} +// NEW-PATH: [[META22]] = !{[[META4]], i64 8, !"_ZTS7StructS", [[META14]], i64 0, i64 2, [[META3]], i64 4, i64 4} +// NEW-PATH: [[TBAA23]] = !{[[META22]], [[META14]], i64 0, i64 2} +// NEW-PATH: [[TBAA24]] = !{[[META25:![0-9]+]], [[META3]], i64 4, i64 4} +// NEW-PATH: [[META25]] = !{[[META4]], i64 8, !"_ZTS8StructS2", [[META14]], i64 0, i64 2, [[META3]], i64 4, i64 4} +// NEW-PATH: [[TBAA26]] = !{[[META25]], [[META14]], i64 0, i64 2} +// NEW-PATH: [[TBAA27]] = !{[[META28:![0-9]+]], [[META3]], i64 12, i64 4} +// NEW-PATH: [[META28]] = !{[[META4]], i64 32, !"_ZTS7StructC", [[META14]], i64 0, i64 2, [[META17]], i64 4, i64 24, [[META3]], i64 28, i64 4} +// NEW-PATH: [[TBAA29]] = !{[[META30:![0-9]+]], [[META3]], i64 12, i64 4} +// NEW-PATH: [[META30]] = !{[[META4]], i64 36, !"_ZTS7StructD", [[META14]], i64 0, i64 2, [[META17]], i64 4, i64 24, [[META3]], i64 28, i64 4, [[META4]], i64 32, i64 1} +// NEW-PATH: [[TBAA33]] = !{[[META34:![0-9]+]], [[META4]], i64 1, i64 1} +// NEW-PATH: [[META34]] = !{[[META4]], i64 3, !"_ZTS4five", [[META4]], i64 0, i64 1, [[META4]], i64 1, i64 1, [[META4]], i64 2, i64 1} +// NEW-PATH: [[TBAA37]] = !{[[META38:![0-9]+]], [[META4]], i64 4, i64 1} +// NEW-PATH: [[META38]] = !{[[META4]], i64 6, !"_ZTS3six", [[META4]], i64 0, i64 1, [[META4]], i64 4, i64 1, [[META4]], i64 5, i64 1} +//. diff --git a/clang/test/CodeGenCXX/attr-likelihood-if-branch-weights.cpp b/clang/test/CodeGenCXX/attr-likelihood-if-branch-weights.cpp index 8969e12f8f797..2af292961a331 100644 --- a/clang/test/CodeGenCXX/attr-likelihood-if-branch-weights.cpp +++ b/clang/test/CodeGenCXX/attr-likelihood-if-branch-weights.cpp @@ -10,7 +10,7 @@ extern bool B(); // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2:![0-9]+]], !range [[RNG6:![0-9]+]], !noundef [[META7:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA6:![0-9]+]], !range [[RNG8:![0-9]+]], !noundef [[META9:![0-9]+]] // CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 true) // CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] @@ -38,7 +38,7 @@ bool f() { // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA6]], !range [[RNG8]], !noundef [[META9]] // CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) // CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] @@ -67,7 +67,7 @@ bool g() { // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA6]], !range [[RNG8]], !noundef [[META9]] // CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) // CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] @@ -93,14 +93,14 @@ bool h() { // CHECK-LABEL: define dso_local void @_Z8NullStmtv( // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA6]], !range [[RNG8]], !noundef [[META9]] // CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) // CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] // CHECK: [[IF_THEN]]: // CHECK-NEXT: br label %[[IF_END:.*]] // CHECK: [[IF_ELSE]]: -// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA2]] +// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA6]] // CHECK-NEXT: br label %[[IF_END]] // CHECK: [[IF_END]]: // CHECK-NEXT: ret void @@ -117,7 +117,7 @@ void NullStmt() { // CHECK-LABEL: define dso_local void @_Z6IfStmtv( // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA6]], !range [[RNG8]], !noundef [[META9]] // CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) // CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END2:.*]] @@ -129,7 +129,7 @@ void NullStmt() { // CHECK: [[IF_END]]: // CHECK-NEXT: br label %[[IF_END2]] // CHECK: [[IF_END2]]: -// CHECK-NEXT: [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA6]], !range [[RNG8]], !noundef [[META9]] // CHECK-NEXT: [[LOADEDV3:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK-NEXT: br i1 [[LOADEDV3]], label %[[IF_THEN4:.*]], label %[[IF_END8:.*]] // CHECK: [[IF_THEN4]]: @@ -137,7 +137,7 @@ void NullStmt() { // CHECK-NEXT: [[CALL5_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CALL5]], i1 false) // CHECK-NEXT: br i1 [[CALL5_EXPVAL]], label %[[IF_THEN6:.*]], label %[[IF_END7:.*]] // CHECK: [[IF_THEN6]]: -// CHECK-NEXT: store volatile i8 0, ptr @b, align 1, !tbaa [[BOOL_TBAA2]] +// CHECK-NEXT: store volatile i8 0, ptr @b, align 1, !tbaa [[BOOL_TBAA6]] // CHECK-NEXT: br label %[[IF_END7]] // CHECK: [[IF_END7]]: // CHECK-NEXT: br label %[[IF_END8]] @@ -157,7 +157,7 @@ void IfStmt() { // CHECK-LABEL: define dso_local void @_Z9WhileStmtv( // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA6]], !range [[RNG8]], !noundef [[META9]] // CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) // CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] @@ -167,11 +167,11 @@ void IfStmt() { // CHECK-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @_Z1Bv() // CHECK-NEXT: br i1 [[CALL]], label %[[WHILE_BODY:.*]], label %[[WHILE_END:.*]] // CHECK: [[WHILE_BODY]]: -// CHECK-NEXT: br label %[[WHILE_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK-NEXT: br label %[[WHILE_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK: [[WHILE_END]]: // CHECK-NEXT: br label %[[IF_END]] // CHECK: [[IF_END]]: -// CHECK-NEXT: [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA6]], !range [[RNG8]], !noundef [[META9]] // CHECK-NEXT: [[LOADEDV1:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK-NEXT: br i1 [[LOADEDV1]], label %[[IF_THEN2:.*]], label %[[IF_END7:.*]] // CHECK: [[IF_THEN2]]: @@ -181,8 +181,8 @@ void IfStmt() { // CHECK-NEXT: [[CALL4_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CALL4]], i1 false) // CHECK-NEXT: br i1 [[CALL4_EXPVAL]], label %[[WHILE_BODY5:.*]], label %[[WHILE_END6:.*]] // CHECK: [[WHILE_BODY5]]: -// CHECK-NEXT: store volatile i8 0, ptr @b, align 1, !tbaa [[BOOL_TBAA2]] -// CHECK-NEXT: br label %[[WHILE_COND3]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK-NEXT: store volatile i8 0, ptr @b, align 1, !tbaa [[BOOL_TBAA6]] +// CHECK-NEXT: br label %[[WHILE_COND3]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK: [[WHILE_END6]]: // CHECK-NEXT: br label %[[IF_END7]] // CHECK: [[IF_END7]]: @@ -200,7 +200,7 @@ void WhileStmt() { // CHECK-LABEL: define dso_local void @_Z6DoStmtv( // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA6]], !range [[RNG8]], !noundef [[META9]] // CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) // CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] @@ -210,11 +210,11 @@ void WhileStmt() { // CHECK-NEXT: br label %[[DO_COND:.*]] // CHECK: [[DO_COND]]: // CHECK-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @_Z1Bv() -// CHECK-NEXT: br i1 [[CALL]], label %[[DO_BODY]], label %[[DO_END:.*]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK-NEXT: br i1 [[CALL]], label %[[DO_BODY]], label %[[DO_END:.*]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK: [[DO_END]]: // CHECK-NEXT: br label %[[IF_END]] // CHECK: [[IF_END]]: -// CHECK-NEXT: [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA6]], !range [[RNG8]], !noundef [[META9]] // CHECK-NEXT: [[LOADEDV1:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK-NEXT: br i1 [[LOADEDV1]], label %[[IF_THEN2:.*]], label %[[IF_END7:.*]] // CHECK: [[IF_THEN2]]: @@ -223,7 +223,7 @@ void WhileStmt() { // CHECK-NEXT: br label %[[DO_COND4:.*]] // CHECK: [[DO_COND4]]: // CHECK-NEXT: [[CALL5:%.*]] = call noundef zeroext i1 @_Z1Bv() -// CHECK-NEXT: br i1 [[CALL5]], label %[[DO_BODY3]], label %[[DO_END6:.*]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK-NEXT: br i1 [[CALL5]], label %[[DO_BODY3]], label %[[DO_END6:.*]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK: [[DO_END6]]: // CHECK-NEXT: br label %[[IF_END7]] // CHECK: [[IF_END7]]: @@ -244,7 +244,7 @@ void DoStmt() { // CHECK-LABEL: define dso_local void @_Z7ForStmtv( // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA6]], !range [[RNG8]], !noundef [[META9]] // CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) // CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] @@ -254,11 +254,11 @@ void DoStmt() { // CHECK-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @_Z1Bv() // CHECK-NEXT: br i1 [[CALL]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK: [[FOR_BODY]]: -// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK: [[FOR_END]]: // CHECK-NEXT: br label %[[IF_END]] // CHECK: [[IF_END]]: -// CHECK-NEXT: [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA6]], !range [[RNG8]], !noundef [[META9]] // CHECK-NEXT: [[LOADEDV1:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK-NEXT: br i1 [[LOADEDV1]], label %[[IF_THEN2:.*]], label %[[IF_END7:.*]] // CHECK: [[IF_THEN2]]: @@ -268,7 +268,7 @@ void DoStmt() { // CHECK-NEXT: [[CALL4_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CALL4]], i1 false) // CHECK-NEXT: br i1 [[CALL4_EXPVAL]], label %[[FOR_BODY5:.*]], label %[[FOR_END6:.*]] // CHECK: [[FOR_BODY5]]: -// CHECK-NEXT: br label %[[FOR_COND3]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK-NEXT: br label %[[FOR_COND3]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK: [[FOR_END6]]: // CHECK-NEXT: br label %[[IF_END7]] // CHECK: [[IF_END7]]: @@ -286,14 +286,14 @@ void ForStmt() { // CHECK-LABEL: define dso_local void @_Z8GotoStmtv( // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA6]], !range [[RNG8]], !noundef [[META9]] // CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) // CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] // CHECK: [[IF_THEN]]: // CHECK-NEXT: br label %[[END:.*]] // CHECK: [[IF_ELSE]]: -// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA2]] +// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA6]] // CHECK-NEXT: br label %[[IF_END:.*]] // CHECK: [[IF_END]]: // CHECK-NEXT: br label %[[END]] @@ -313,14 +313,14 @@ end:; // CHECK-LABEL: define dso_local void @_Z10ReturnStmtv( // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA6]], !range [[RNG8]], !noundef [[META9]] // CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) // CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] // CHECK: [[IF_THEN]]: // CHECK-NEXT: br label %[[IF_END:.*]] // CHECK: [[IF_ELSE]]: -// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA2]] +// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA6]] // CHECK-NEXT: br label %[[IF_END]] // CHECK: [[IF_END]]: // CHECK-NEXT: ret void @@ -337,31 +337,31 @@ void ReturnStmt() { // CHECK-LABEL: define dso_local void @_Z10SwitchStmtv( // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA6]], !range [[RNG8]], !noundef [[META9]] // CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) // CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] // CHECK: [[IF_THEN]]: -// CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA16:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2:![0-9]+]] // CHECK-NEXT: switch i32 [[TMP1]], label %[[SW_EPILOG:.*]] [ // CHECK-NEXT: ] // CHECK: [[SW_EPILOG]]: // CHECK-NEXT: br label %[[IF_END:.*]] // CHECK: [[IF_ELSE]]: -// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA2]] +// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA6]] // CHECK-NEXT: br label %[[IF_END]] // CHECK: [[IF_END]]: -// CHECK-NEXT: [[TMP2:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[TMP2:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA6]], !range [[RNG8]], !noundef [[META9]] // CHECK-NEXT: [[LOADEDV1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-NEXT: br i1 [[LOADEDV1]], label %[[IF_THEN2:.*]], label %[[IF_ELSE4:.*]] // CHECK: [[IF_THEN2]]: -// CHECK-NEXT: [[TMP3:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA16]] +// CHECK-NEXT: [[TMP3:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: switch i32 [[TMP3]], label %[[SW_EPILOG3:.*]] [ // CHECK-NEXT: ] // CHECK: [[SW_EPILOG3]]: // CHECK-NEXT: br label %[[IF_END5:.*]] // CHECK: [[IF_ELSE4]]: -// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA2]] +// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA6]] // CHECK-NEXT: br label %[[IF_END5]] // CHECK: [[IF_END5]]: // CHECK-NEXT: ret void @@ -383,20 +383,20 @@ void SwitchStmt() { } //. -// CHECK: [[BOOL_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"bool", [[META4:![0-9]+]], i64 0} +// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} // CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"Simple C++ TBAA"} -// CHECK: [[RNG6]] = !{i8 0, i8 2} -// CHECK: [[META7]] = !{} -// CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]], [[META10:![0-9]+]]} -// CHECK: [[META9]] = !{!"llvm.loop.mustprogress"} -// CHECK: [[META10]] = !{!"llvm.loop.unroll.disable"} -// CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META9]], [[META10]]} -// CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META9]], [[META10]]} -// CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META9]], [[META10]]} -// CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META9]], [[META10]]} -// CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META9]], [[META10]]} -// CHECK: [[INT_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0} -// CHECK: [[META17]] = !{!"int", [[META4]], i64 0} +// CHECK: [[BOOL_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"bool", [[META4]], i64 0} +// CHECK: [[RNG8]] = !{i8 0, i8 2} +// CHECK: [[META9]] = !{} +// CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META11:![0-9]+]], [[META12:![0-9]+]]} +// CHECK: [[META11]] = !{!"llvm.loop.mustprogress"} +// CHECK: [[META12]] = !{!"llvm.loop.unroll.disable"} +// CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META11]], [[META12]]} +// CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META11]], [[META12]]} +// CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META11]], [[META12]]} +// CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META11]], [[META12]]} +// CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META11]], [[META12]]} //. diff --git a/clang/test/CodeGenCXX/builtin-get-vtable-pointer.cpp b/clang/test/CodeGenCXX/builtin-get-vtable-pointer.cpp index 604fb6c5585ac..0bde63496f4cb 100644 --- a/clang/test/CodeGenCXX/builtin-get-vtable-pointer.cpp +++ b/clang/test/CodeGenCXX/builtin-get-vtable-pointer.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 %s -x c++ -std=c++23 -triple x86_64-apple-darwin10 -emit-llvm -O1 -disable-llvm-passes -no-enable-noundef-analysis -o - | FileCheck --check-prefix=CHECK-NOAUTH %s // RUN: %clang_cc1 %s -x c++ -std=c++23 -triple arm64-apple-ios -fptrauth-calls -fptrauth-vtable-pointer-type-discrimination -emit-llvm -O1 -disable-llvm-passes -no-enable-noundef-analysis -o - | FileCheck --check-prefix=CHECK-TYPEAUTH %s // RUN: %clang_cc1 %s -x c++ -std=c++23 -triple arm64-apple-ios -fptrauth-calls -fptrauth-vtable-pointer-address-discrimination -emit-llvm -O1 -disable-llvm-passes -no-enable-noundef-analysis -o - | FileCheck --check-prefix=CHECK-ADDRESSAUTH %s @@ -35,309 +36,1290 @@ template struct same_type { static const bool value = true; }; +// CHECK-NOAUTH-LABEL: define ptr @_ZN5test11aEPNS_1AE( +// CHECK-NOAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NOAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-NOAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NOAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA6:![0-9]+]] +// CHECK-NOAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA6]] +// CHECK-NOAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9:![0-9]+]] +// CHECK-NOAUTH-NEXT: ret ptr [[VTABLE]] +// +// CHECK-TYPEAUTH-LABEL: define ptr @_ZN5test11aEPNS_1AE( +// CHECK-TYPEAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-TYPEAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-TYPEAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-TYPEAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA6:![0-9]+]] +// CHECK-TYPEAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA6]] +// CHECK-TYPEAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9:![0-9]+]] +// CHECK-TYPEAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP1]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +// CHECK-TYPEAUTH-NEXT: [[TMP4:%.*]] = load volatile i8, ptr [[TMP3]], align 8 +// CHECK-TYPEAUTH-NEXT: ret ptr [[TMP3]] +// +// CHECK-ADDRESSAUTH-LABEL: define ptr @_ZN5test11aEPNS_1AE( +// CHECK-ADDRESSAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-ADDRESSAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-ADDRESSAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-ADDRESSAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA6:![0-9]+]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA6]] +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9:![0-9]+]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP2]], i32 2, i64 [[TMP1]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[TMP5:%.*]] = load volatile i8, ptr [[TMP4]], align 8 +// CHECK-ADDRESSAUTH-NEXT: ret ptr [[TMP4]] +// +// CHECK-BOTHAUTH-LABEL: define ptr @_ZN5test11aEPNS_1AE( +// CHECK-BOTHAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-BOTHAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-BOTHAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-BOTHAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA6:![0-9]+]] +// CHECK-BOTHAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA6]] +// CHECK-BOTHAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9:![0-9]+]] +// CHECK-BOTHAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP1]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP3]], i32 2, i64 [[TMP2]]) +// CHECK-BOTHAUTH-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-BOTHAUTH-NEXT: [[TMP6:%.*]] = load volatile i8, ptr [[TMP5]], align 8 +// CHECK-BOTHAUTH-NEXT: ret ptr [[TMP5]] +// const void *a(A *o) { static_assert(same_type::value); - // CHECK-NOAUTH: define ptr @_ZN5test11aEPNS_1AE(ptr %o) #0 { - // CHECK-TYPEAUTH: define ptr @_ZN5test11aEPNS_1AE(ptr %o) #0 { return __builtin_get_vtable_pointer(o); - // CHECK-NOAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-TYPEAUTH: %0 = load ptr, ptr %o.addr, align 8 - // CHECK-TYPEAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-TYPEAUTH: %1 = ptrtoint ptr %vtable to i64 - // CHECK-TYPEAUTH: %2 = call i64 @llvm.ptrauth.auth(i64 %1, i32 2, i64 48388) - // CHECK-TYPEAUTH: %3 = inttoptr i64 %2 to ptr - // CHECK-TYPEAUTH: %4 = load volatile i8, ptr %3, align 8 - // CHECK-ADDRESSAUTH: %2 = ptrtoint ptr %vtable to i64 - // CHECK-ADDRESSAUTH: %3 = call i64 @llvm.ptrauth.auth(i64 %2, i32 2, i64 %1) - // CHECK-ADDRESSAUTH: %4 = inttoptr i64 %3 to ptr - // CHECK-ADDRESSAUTH: %5 = load volatile i8, ptr %4, align 8 - // CHECK-BOTHAUTH: [[T1:%.*]] = ptrtoint ptr %0 to i64 - // CHECK-BOTHAUTH: [[T2:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[T1]], i64 48388) - // CHECK-BOTHAUTH: [[T3:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-BOTHAUTH: [[T4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[T3]], i32 2, i64 [[T2]]) - // CHECK-BOTHAUTH: [[T5:%.*]] = inttoptr i64 [[T4]] to ptr - // CHECK-BOTHAUTH: [[T6:%.*]] = load volatile i8, ptr [[T5]], align 8 } +// CHECK-NOAUTH-LABEL: define ptr @_ZN5test11bEPNS_1BE( +// CHECK-NOAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-NOAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-NOAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NOAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA11:![0-9]+]] +// CHECK-NOAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-NOAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-NOAUTH-NEXT: ret ptr [[VTABLE]] +// +// CHECK-TYPEAUTH-LABEL: define ptr @_ZN5test11bEPNS_1BE( +// CHECK-TYPEAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-TYPEAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-TYPEAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-TYPEAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA11:![0-9]+]] +// CHECK-TYPEAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-TYPEAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-TYPEAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP1]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +// CHECK-TYPEAUTH-NEXT: [[TMP4:%.*]] = load volatile i8, ptr [[TMP3]], align 8 +// CHECK-TYPEAUTH-NEXT: ret ptr [[TMP3]] +// +// CHECK-ADDRESSAUTH-LABEL: define ptr @_ZN5test11bEPNS_1BE( +// CHECK-ADDRESSAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-ADDRESSAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-ADDRESSAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-ADDRESSAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA11:![0-9]+]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP2]], i32 2, i64 [[TMP1]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[TMP5:%.*]] = load volatile i8, ptr [[TMP4]], align 8 +// CHECK-ADDRESSAUTH-NEXT: ret ptr [[TMP4]] +// +// CHECK-BOTHAUTH-LABEL: define ptr @_ZN5test11bEPNS_1BE( +// CHECK-BOTHAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-BOTHAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-BOTHAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-BOTHAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA11:![0-9]+]] +// CHECK-BOTHAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-BOTHAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-BOTHAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP1]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP3]], i32 2, i64 [[TMP2]]) +// CHECK-BOTHAUTH-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-BOTHAUTH-NEXT: [[TMP6:%.*]] = load volatile i8, ptr [[TMP5]], align 8 +// CHECK-BOTHAUTH-NEXT: ret ptr [[TMP5]] +// const void *b(B *o) { static_assert(same_type::value); - // CHECK-TYPEAUTH: define ptr @_ZN5test11bEPNS_1BE(ptr %o) #0 { - // CHECK-NOAUTH: define ptr @_ZN5test11bEPNS_1BE(ptr %o) #0 { return __builtin_get_vtable_pointer(o); - // CHECK-NOAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-TYPEAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-TYPEAUTH: %1 = ptrtoint ptr %vtable to i64 - // CHECK-TYPEAUTH: %2 = call i64 @llvm.ptrauth.auth(i64 %1, i32 2, i64 48388) - // CHECK-TYPEAUTH: %3 = inttoptr i64 %2 to ptr - // CHECK-TYPEAUTH: %4 = load volatile i8, ptr %3, align 8 - // CHECK-ADDRESSAUTH: %2 = ptrtoint ptr %vtable to i64 - // CHECK-ADDRESSAUTH: %3 = call i64 @llvm.ptrauth.auth(i64 %2, i32 2, i64 %1) - // CHECK-ADDRESSAUTH: %4 = inttoptr i64 %3 to ptr - // CHECK-ADDRESSAUTH: %5 = load volatile i8, ptr %4, align 8 - // CHECK-BOTHAUTH: [[T2:%.*]] = call i64 @llvm.ptrauth.blend(i64 %1, i64 48388) - // CHECK-BOTHAUTH: [[T3:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-BOTHAUTH: [[T4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[T3]], i32 2, i64 [[T2]]) - // CHECK-BOTHAUTH: [[T5:%.*]] = inttoptr i64 [[T4]] to ptr - // CHECK-BOTHAUTH: [[T6:%.*]] = load volatile i8, ptr [[T5]], align 8 } +// CHECK-NOAUTH-LABEL: define ptr @_ZN5test16b_as_AEPNS_1BE( +// CHECK-NOAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-NOAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-NOAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NOAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-NOAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-NOAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-NOAUTH-NEXT: ret ptr [[VTABLE]] +// +// CHECK-TYPEAUTH-LABEL: define ptr @_ZN5test16b_as_AEPNS_1BE( +// CHECK-TYPEAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-TYPEAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-TYPEAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-TYPEAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-TYPEAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-TYPEAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-TYPEAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP1]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +// CHECK-TYPEAUTH-NEXT: [[TMP4:%.*]] = load volatile i8, ptr [[TMP3]], align 8 +// CHECK-TYPEAUTH-NEXT: ret ptr [[TMP3]] +// +// CHECK-ADDRESSAUTH-LABEL: define ptr @_ZN5test16b_as_AEPNS_1BE( +// CHECK-ADDRESSAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-ADDRESSAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-ADDRESSAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-ADDRESSAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP2]], i32 2, i64 [[TMP1]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[TMP5:%.*]] = load volatile i8, ptr [[TMP4]], align 8 +// CHECK-ADDRESSAUTH-NEXT: ret ptr [[TMP4]] +// +// CHECK-BOTHAUTH-LABEL: define ptr @_ZN5test16b_as_AEPNS_1BE( +// CHECK-BOTHAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-BOTHAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-BOTHAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-BOTHAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-BOTHAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-BOTHAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-BOTHAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP1]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP3]], i32 2, i64 [[TMP2]]) +// CHECK-BOTHAUTH-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-BOTHAUTH-NEXT: [[TMP6:%.*]] = load volatile i8, ptr [[TMP5]], align 8 +// CHECK-BOTHAUTH-NEXT: ret ptr [[TMP5]] +// const void *b_as_A(B *o) { static_assert(same_type::value); - // CHECK-NOAUTH: define ptr @_ZN5test16b_as_AEPNS_1BE(ptr %o) #0 { return __builtin_get_vtable_pointer((A *)o); - // CHECK-NOAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-TYPEAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-TYPEAUTH: %1 = ptrtoint ptr %vtable to i64 - // CHECK-TYPEAUTH: %2 = call i64 @llvm.ptrauth.auth(i64 %1, i32 2, i64 48388) - // CHECK-TYPEAUTH: %3 = inttoptr i64 %2 to ptr - // CHECK-TYPEAUTH: %4 = load volatile i8, ptr %3, align 8 - // CHECK-ADDRESSAUTH: %2 = ptrtoint ptr %vtable to i64 - // CHECK-ADDRESSAUTH: %3 = call i64 @llvm.ptrauth.auth(i64 %2, i32 2, i64 %1) - // CHECK-ADDRESSAUTH: %4 = inttoptr i64 %3 to ptr - // CHECK-ADDRESSAUTH: %5 = load volatile i8, ptr %4, align 8 - // CHECK-BOTHAUTH: [[T2:%.*]] = call i64 @llvm.ptrauth.blend(i64 %1, i64 48388) - // CHECK-BOTHAUTH: [[T3:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-BOTHAUTH: [[T4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[T3]], i32 2, i64 [[T2]]) - // CHECK-BOTHAUTH: [[T5:%.*]] = inttoptr i64 [[T4]] to ptr - // CHECK-BOTHAUTH: [[T6:%.*]] = load volatile i8, ptr [[T5]], align 8 } +// CHECK-NOAUTH-LABEL: define ptr @_ZN5test11cEPNS_1CE( +// CHECK-NOAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-NOAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-NOAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NOAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA13:![0-9]+]] +// CHECK-NOAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-NOAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-NOAUTH-NEXT: ret ptr [[VTABLE]] +// +// CHECK-TYPEAUTH-LABEL: define ptr @_ZN5test11cEPNS_1CE( +// CHECK-TYPEAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-TYPEAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-TYPEAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-TYPEAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA13:![0-9]+]] +// CHECK-TYPEAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-TYPEAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-TYPEAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP1]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +// CHECK-TYPEAUTH-NEXT: [[TMP4:%.*]] = load volatile i8, ptr [[TMP3]], align 8 +// CHECK-TYPEAUTH-NEXT: ret ptr [[TMP3]] +// +// CHECK-ADDRESSAUTH-LABEL: define ptr @_ZN5test11cEPNS_1CE( +// CHECK-ADDRESSAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-ADDRESSAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-ADDRESSAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-ADDRESSAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA13:![0-9]+]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP2]], i32 2, i64 [[TMP1]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[TMP5:%.*]] = load volatile i8, ptr [[TMP4]], align 8 +// CHECK-ADDRESSAUTH-NEXT: ret ptr [[TMP4]] +// +// CHECK-BOTHAUTH-LABEL: define ptr @_ZN5test11cEPNS_1CE( +// CHECK-BOTHAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-BOTHAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-BOTHAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-BOTHAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA13:![0-9]+]] +// CHECK-BOTHAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-BOTHAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-BOTHAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP1]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP3]], i32 2, i64 [[TMP2]]) +// CHECK-BOTHAUTH-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-BOTHAUTH-NEXT: [[TMP6:%.*]] = load volatile i8, ptr [[TMP5]], align 8 +// CHECK-BOTHAUTH-NEXT: ret ptr [[TMP5]] +// const void *c(C *o) { static_assert(same_type::value); - // CHECK-NOAUTH: define ptr @_ZN5test11cEPNS_1CE(ptr %o) #0 { return __builtin_get_vtable_pointer(o); - // CHECK-NOAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-TYPEAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-TYPEAUTH: %1 = ptrtoint ptr %vtable to i64 - // CHECK-TYPEAUTH: %2 = call i64 @llvm.ptrauth.auth(i64 %1, i32 2, i64 48388) - // CHECK-TYPEAUTH: %3 = inttoptr i64 %2 to ptr - // CHECK-TYPEAUTH: %4 = load volatile i8, ptr %3, align 8 - // CHECK-ADDRESSAUTH: %2 = ptrtoint ptr %vtable to i64 - // CHECK-ADDRESSAUTH: %3 = call i64 @llvm.ptrauth.auth(i64 %2, i32 2, i64 %1) - // CHECK-ADDRESSAUTH: %4 = inttoptr i64 %3 to ptr - // CHECK-ADDRESSAUTH: %5 = load volatile i8, ptr %4, align 8 - // CHECK-BOTHAUTH: [[T2:%.*]] = call i64 @llvm.ptrauth.blend(i64 %1, i64 48388) - // CHECK-BOTHAUTH: [[T3:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-BOTHAUTH: [[T4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[T3]], i32 2, i64 [[T2]]) - // CHECK-BOTHAUTH: [[T5:%.*]] = inttoptr i64 [[T4]] to ptr - // CHECK-BOTHAUTH: [[T6:%.*]] = load volatile i8, ptr [[T5]], align 8 } +// CHECK-NOAUTH-LABEL: define ptr @_ZN5test16c_as_ZEPNS_1CE( +// CHECK-NOAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-NOAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-NOAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NOAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-NOAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-NOAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-NOAUTH-NEXT: ret ptr [[VTABLE]] +// +// CHECK-TYPEAUTH-LABEL: define ptr @_ZN5test16c_as_ZEPNS_1CE( +// CHECK-TYPEAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-TYPEAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-TYPEAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-TYPEAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-TYPEAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-TYPEAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-TYPEAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP1]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +// CHECK-TYPEAUTH-NEXT: [[TMP4:%.*]] = load volatile i8, ptr [[TMP3]], align 8 +// CHECK-TYPEAUTH-NEXT: ret ptr [[TMP3]] +// +// CHECK-ADDRESSAUTH-LABEL: define ptr @_ZN5test16c_as_ZEPNS_1CE( +// CHECK-ADDRESSAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-ADDRESSAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-ADDRESSAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-ADDRESSAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP2]], i32 2, i64 [[TMP1]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[TMP5:%.*]] = load volatile i8, ptr [[TMP4]], align 8 +// CHECK-ADDRESSAUTH-NEXT: ret ptr [[TMP4]] +// +// CHECK-BOTHAUTH-LABEL: define ptr @_ZN5test16c_as_ZEPNS_1CE( +// CHECK-BOTHAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-BOTHAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-BOTHAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-BOTHAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-BOTHAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-BOTHAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-BOTHAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP1]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP3]], i32 2, i64 [[TMP2]]) +// CHECK-BOTHAUTH-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-BOTHAUTH-NEXT: [[TMP6:%.*]] = load volatile i8, ptr [[TMP5]], align 8 +// CHECK-BOTHAUTH-NEXT: ret ptr [[TMP5]] +// const void *c_as_Z(C *o) { static_assert(same_type::value); - // CHECK-NOAUTH: define ptr @_ZN5test16c_as_ZEPNS_1CE(ptr %o) #0 { return __builtin_get_vtable_pointer((Z *)o); - // CHECK-NOAUTH: %0 = load ptr, ptr %o.addr, align 8 - // CHECK-NOAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-TYPEAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-TYPEAUTH: %1 = ptrtoint ptr %vtable to i64 - // CHECK-TYPEAUTH: %2 = call i64 @llvm.ptrauth.auth(i64 %1, i32 2, i64 48388) - // CHECK-TYPEAUTH: %3 = inttoptr i64 %2 to ptr - // CHECK-TYPEAUTH: %4 = load volatile i8, ptr %3, align 8 - // CHECK-ADDRESSAUTH: %2 = ptrtoint ptr %vtable to i64 - // CHECK-ADDRESSAUTH: %3 = call i64 @llvm.ptrauth.auth(i64 %2, i32 2, i64 %1) - // CHECK-ADDRESSAUTH: %4 = inttoptr i64 %3 to ptr - // CHECK-ADDRESSAUTH: %5 = load volatile i8, ptr %4, align 8 - // CHECK-BOTHAUTH: [[T2:%.*]] = call i64 @llvm.ptrauth.blend(i64 %1, i64 48388) - // CHECK-BOTHAUTH: [[T3:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-BOTHAUTH: [[T4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[T3]], i32 2, i64 [[T2]]) - // CHECK-BOTHAUTH: [[T5:%.*]] = inttoptr i64 [[T4]] to ptr - // CHECK-BOTHAUTH: [[T6:%.*]] = load volatile i8, ptr [[T5]], align 8 } +// CHECK-NOAUTH-LABEL: define ptr @_ZN5test16c_as_BEPNS_1CE( +// CHECK-NOAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-NOAUTH-NEXT: [[ENTRY:.*]]: +// CHECK-NOAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NOAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-NOAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-NOAUTH-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +// CHECK-NOAUTH-NEXT: br i1 [[TMP1]], label %[[CAST_END:.*]], label %[[CAST_NOTNULL:.*]] +// CHECK-NOAUTH: [[CAST_NOTNULL]]: +// CHECK-NOAUTH-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +// CHECK-NOAUTH-NEXT: br label %[[CAST_END]] +// CHECK-NOAUTH: [[CAST_END]]: +// CHECK-NOAUTH-NEXT: [[CAST_RESULT:%.*]] = phi ptr [ [[ADD_PTR]], %[[CAST_NOTNULL]] ], [ null, %[[ENTRY]] ] +// CHECK-NOAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[CAST_RESULT]], align 8, !tbaa [[TBAA9]] +// CHECK-NOAUTH-NEXT: ret ptr [[VTABLE]] +// +// CHECK-TYPEAUTH-LABEL: define ptr @_ZN5test16c_as_BEPNS_1CE( +// CHECK-TYPEAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-TYPEAUTH-NEXT: [[ENTRY:.*]]: +// CHECK-TYPEAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-TYPEAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-TYPEAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-TYPEAUTH-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +// CHECK-TYPEAUTH-NEXT: br i1 [[TMP1]], label %[[CAST_END:.*]], label %[[CAST_NOTNULL:.*]] +// CHECK-TYPEAUTH: [[CAST_NOTNULL]]: +// CHECK-TYPEAUTH-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +// CHECK-TYPEAUTH-NEXT: br label %[[CAST_END]] +// CHECK-TYPEAUTH: [[CAST_END]]: +// CHECK-TYPEAUTH-NEXT: [[CAST_RESULT:%.*]] = phi ptr [ [[ADD_PTR]], %[[CAST_NOTNULL]] ], [ null, %[[ENTRY]] ] +// CHECK-TYPEAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[CAST_RESULT]], align 8, !tbaa [[TBAA9]] +// CHECK-TYPEAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP2]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-TYPEAUTH-NEXT: [[TMP5:%.*]] = load volatile i8, ptr [[TMP4]], align 8 +// CHECK-TYPEAUTH-NEXT: ret ptr [[TMP4]] +// +// CHECK-ADDRESSAUTH-LABEL: define ptr @_ZN5test16c_as_BEPNS_1CE( +// CHECK-ADDRESSAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-ADDRESSAUTH-NEXT: [[ENTRY:.*]]: +// CHECK-ADDRESSAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-ADDRESSAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +// CHECK-ADDRESSAUTH-NEXT: br i1 [[TMP1]], label %[[CAST_END:.*]], label %[[CAST_NOTNULL:.*]] +// CHECK-ADDRESSAUTH: [[CAST_NOTNULL]]: +// CHECK-ADDRESSAUTH-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +// CHECK-ADDRESSAUTH-NEXT: br label %[[CAST_END]] +// CHECK-ADDRESSAUTH: [[CAST_END]]: +// CHECK-ADDRESSAUTH-NEXT: [[CAST_RESULT:%.*]] = phi ptr [ [[ADD_PTR]], %[[CAST_NOTNULL]] ], [ null, %[[ENTRY]] ] +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[CAST_RESULT]], align 8, !tbaa [[TBAA9]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[CAST_RESULT]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP3]], i32 2, i64 [[TMP2]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[TMP6:%.*]] = load volatile i8, ptr [[TMP5]], align 8 +// CHECK-ADDRESSAUTH-NEXT: ret ptr [[TMP5]] +// +// CHECK-BOTHAUTH-LABEL: define ptr @_ZN5test16c_as_BEPNS_1CE( +// CHECK-BOTHAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-BOTHAUTH-NEXT: [[ENTRY:.*]]: +// CHECK-BOTHAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-BOTHAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-BOTHAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA13]] +// CHECK-BOTHAUTH-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +// CHECK-BOTHAUTH-NEXT: br i1 [[TMP1]], label %[[CAST_END:.*]], label %[[CAST_NOTNULL:.*]] +// CHECK-BOTHAUTH: [[CAST_NOTNULL]]: +// CHECK-BOTHAUTH-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +// CHECK-BOTHAUTH-NEXT: br label %[[CAST_END]] +// CHECK-BOTHAUTH: [[CAST_END]]: +// CHECK-BOTHAUTH-NEXT: [[CAST_RESULT:%.*]] = phi ptr [ [[ADD_PTR]], %[[CAST_NOTNULL]] ], [ null, %[[ENTRY]] ] +// CHECK-BOTHAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[CAST_RESULT]], align 8, !tbaa [[TBAA9]] +// CHECK-BOTHAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[CAST_RESULT]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP2]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP5:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP4]], i32 2, i64 [[TMP3]]) +// CHECK-BOTHAUTH-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +// CHECK-BOTHAUTH-NEXT: [[TMP7:%.*]] = load volatile i8, ptr [[TMP6]], align 8 +// CHECK-BOTHAUTH-NEXT: ret ptr [[TMP6]] +// const void *c_as_B(C *o) { static_assert(same_type::value); - // CHECK-NOAUTH: define ptr @_ZN5test16c_as_BEPNS_1CE(ptr %o) #0 { return __builtin_get_vtable_pointer((B *)o); - // CHECK-NOAUTH: %add.ptr = getelementptr inbounds i8, ptr %0, i64 8 - // CHECK-NOAUTH: br label %cast.end - // CHECK-NOAUTH: %cast.result = phi ptr [ %add.ptr, %cast.notnull ], [ null, %entry ] - // CHECK-NOAUTH: %vtable = load ptr, ptr %cast.result, align 8 - // CHECK-TYPEAUTH: %cast.result = phi ptr [ %add.ptr, %cast.notnull ], [ null, %entry ] - // CHECK-TYPEAUTH: %vtable = load ptr, ptr %cast.result, align 8 - // CHECK-TYPEAUTH: %2 = ptrtoint ptr %vtable to i64 - // CHECK-TYPEAUTH: %3 = call i64 @llvm.ptrauth.auth(i64 %2, i32 2, i64 48388) - // CHECK-TYPEAUTH: %4 = inttoptr i64 %3 to ptr - // CHECK-TYPEAUTH: %5 = load volatile i8, ptr %4, align 8 - // CHECK-ADDRESSAUTH: %2 = ptrtoint ptr %cast.result to i64 - // CHECK-ADDRESSAUTH: %3 = ptrtoint ptr %vtable to i64 - // CHECK-ADDRESSAUTH: %4 = call i64 @llvm.ptrauth.auth(i64 %3, i32 2, i64 %2) - // CHECK-ADDRESSAUTH: %5 = inttoptr i64 %4 to ptr - // CHECK-ADDRESSAUTH: %6 = load volatile i8, ptr %5, align 8 - // CHECK-BOTHAUTH: [[T2:%.*]] = call i64 @llvm.ptrauth.blend(i64 %2, i64 48388) - // CHECK-BOTHAUTH: [[T3:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-BOTHAUTH: [[T4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[T3]], i32 2, i64 [[T2]]) - // CHECK-BOTHAUTH: [[T5:%.*]] = inttoptr i64 [[T4]] to ptr - // CHECK-BOTHAUTH: [[T6:%.*]] = load volatile i8, ptr [[T5]], align 8 } +// CHECK-NOAUTH-LABEL: define ptr @_ZN5test11dEPNS_1DE( +// CHECK-NOAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-NOAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-NOAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NOAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA15:![0-9]+]] +// CHECK-NOAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA15]] +// CHECK-NOAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-NOAUTH-NEXT: ret ptr [[VTABLE]] +// +// CHECK-TYPEAUTH-LABEL: define ptr @_ZN5test11dEPNS_1DE( +// CHECK-TYPEAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-TYPEAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-TYPEAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-TYPEAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA15:![0-9]+]] +// CHECK-TYPEAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA15]] +// CHECK-TYPEAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-TYPEAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP1]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +// CHECK-TYPEAUTH-NEXT: [[TMP4:%.*]] = load volatile i8, ptr [[TMP3]], align 8 +// CHECK-TYPEAUTH-NEXT: ret ptr [[TMP3]] +// +// CHECK-ADDRESSAUTH-LABEL: define ptr @_ZN5test11dEPNS_1DE( +// CHECK-ADDRESSAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-ADDRESSAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-ADDRESSAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-ADDRESSAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA15:![0-9]+]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA15]] +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP2]], i32 2, i64 [[TMP1]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[TMP5:%.*]] = load volatile i8, ptr [[TMP4]], align 8 +// CHECK-ADDRESSAUTH-NEXT: ret ptr [[TMP4]] +// +// CHECK-BOTHAUTH-LABEL: define ptr @_ZN5test11dEPNS_1DE( +// CHECK-BOTHAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-BOTHAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-BOTHAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-BOTHAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA15:![0-9]+]] +// CHECK-BOTHAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA15]] +// CHECK-BOTHAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-BOTHAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP1]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP3]], i32 2, i64 [[TMP2]]) +// CHECK-BOTHAUTH-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-BOTHAUTH-NEXT: [[TMP6:%.*]] = load volatile i8, ptr [[TMP5]], align 8 +// CHECK-BOTHAUTH-NEXT: ret ptr [[TMP5]] +// const void *d(D *o) { static_assert(same_type::value); - // CHECK-NOAUTH: define ptr @_ZN5test11dEPNS_1DE(ptr %o) #0 { return __builtin_get_vtable_pointer(o); - // CHECK-NOAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-TYPEAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-TYPEAUTH: %1 = ptrtoint ptr %vtable to i64 - // CHECK-TYPEAUTH: %2 = call i64 @llvm.ptrauth.auth(i64 %1, i32 2, i64 48388) - // CHECK-TYPEAUTH: %3 = inttoptr i64 %2 to ptr - // CHECK-TYPEAUTH: %4 = load volatile i8, ptr %3, align 8 - // CHECK-ADDRESSAUTH: %1 = ptrtoint ptr %0 to i64 - // CHECK-ADDRESSAUTH: %2 = ptrtoint ptr %vtable to i64 - // CHECK-ADDRESSAUTH: %3 = call i64 @llvm.ptrauth.auth(i64 %2, i32 2, i64 %1) - // CHECK-ADDRESSAUTH: %4 = inttoptr i64 %3 to ptr - // CHECK-ADDRESSAUTH: %5 = load volatile i8, ptr %4, align 8 - // CHECK-BOTHAUTH: [[T1:%.*]] = ptrtoint ptr %0 to i64 - // CHECK-BOTHAUTH: [[T2:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[T1]], i64 48388) - // CHECK-BOTHAUTH: [[T3:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-BOTHAUTH: [[T4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[T3]], i32 2, i64 [[T2]]) - // CHECK-BOTHAUTH: [[T5:%.*]] = inttoptr i64 [[T4]] to ptr - // CHECK-BOTHAUTH: [[T6:%.*]] = load volatile i8, ptr [[T5]], align 8 } +// CHECK-NOAUTH-LABEL: define ptr @_ZN5test16d_as_AEPNS_1DE( +// CHECK-NOAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-NOAUTH-NEXT: [[ENTRY:.*]]: +// CHECK-NOAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NOAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA15]] +// CHECK-NOAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA15]] +// CHECK-NOAUTH-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +// CHECK-NOAUTH-NEXT: br i1 [[TMP1]], label %[[CAST_END:.*]], label %[[CAST_NOTNULL:.*]] +// CHECK-NOAUTH: [[CAST_NOTNULL]]: +// CHECK-NOAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-NOAUTH-NEXT: [[VBASE_OFFSET_PTR:%.*]] = getelementptr i8, ptr [[VTABLE]], i64 -32 +// CHECK-NOAUTH-NEXT: [[VBASE_OFFSET:%.*]] = load i64, ptr [[VBASE_OFFSET_PTR]], align 8 +// CHECK-NOAUTH-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 [[VBASE_OFFSET]] +// CHECK-NOAUTH-NEXT: br label %[[CAST_END]] +// CHECK-NOAUTH: [[CAST_END]]: +// CHECK-NOAUTH-NEXT: [[CAST_RESULT:%.*]] = phi ptr [ [[ADD_PTR]], %[[CAST_NOTNULL]] ], [ null, %[[ENTRY]] ] +// CHECK-NOAUTH-NEXT: [[VTABLE1:%.*]] = load ptr, ptr [[CAST_RESULT]], align 8, !tbaa [[TBAA9]] +// CHECK-NOAUTH-NEXT: ret ptr [[VTABLE1]] +// +// CHECK-TYPEAUTH-LABEL: define ptr @_ZN5test16d_as_AEPNS_1DE( +// CHECK-TYPEAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-TYPEAUTH-NEXT: [[ENTRY:.*]]: +// CHECK-TYPEAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-TYPEAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA15]] +// CHECK-TYPEAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA15]] +// CHECK-TYPEAUTH-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +// CHECK-TYPEAUTH-NEXT: br i1 [[TMP1]], label %[[CAST_END:.*]], label %[[CAST_NOTNULL:.*]] +// CHECK-TYPEAUTH: [[CAST_NOTNULL]]: +// CHECK-TYPEAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-TYPEAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP2]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-TYPEAUTH-NEXT: [[VBASE_OFFSET_PTR:%.*]] = getelementptr i8, ptr [[TMP4]], i64 -32 +// CHECK-TYPEAUTH-NEXT: [[VBASE_OFFSET:%.*]] = load i64, ptr [[VBASE_OFFSET_PTR]], align 8 +// CHECK-TYPEAUTH-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 [[VBASE_OFFSET]] +// CHECK-TYPEAUTH-NEXT: br label %[[CAST_END]] +// CHECK-TYPEAUTH: [[CAST_END]]: +// CHECK-TYPEAUTH-NEXT: [[CAST_RESULT:%.*]] = phi ptr [ [[ADD_PTR]], %[[CAST_NOTNULL]] ], [ null, %[[ENTRY]] ] +// CHECK-TYPEAUTH-NEXT: [[VTABLE1:%.*]] = load ptr, ptr [[CAST_RESULT]], align 8, !tbaa [[TBAA9]] +// CHECK-TYPEAUTH-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[VTABLE1]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP6:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP5]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +// CHECK-TYPEAUTH-NEXT: [[TMP8:%.*]] = load volatile i8, ptr [[TMP7]], align 8 +// CHECK-TYPEAUTH-NEXT: ret ptr [[TMP7]] +// +// CHECK-ADDRESSAUTH-LABEL: define ptr @_ZN5test16d_as_AEPNS_1DE( +// CHECK-ADDRESSAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-ADDRESSAUTH-NEXT: [[ENTRY:.*]]: +// CHECK-ADDRESSAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-ADDRESSAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA15]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA15]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +// CHECK-ADDRESSAUTH-NEXT: br i1 [[TMP1]], label %[[CAST_END:.*]], label %[[CAST_NOTNULL:.*]] +// CHECK-ADDRESSAUTH: [[CAST_NOTNULL]]: +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP3]], i32 2, i64 [[TMP2]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[VBASE_OFFSET_PTR:%.*]] = getelementptr i8, ptr [[TMP5]], i64 -32 +// CHECK-ADDRESSAUTH-NEXT: [[VBASE_OFFSET:%.*]] = load i64, ptr [[VBASE_OFFSET_PTR]], align 8 +// CHECK-ADDRESSAUTH-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 [[VBASE_OFFSET]] +// CHECK-ADDRESSAUTH-NEXT: br label %[[CAST_END]] +// CHECK-ADDRESSAUTH: [[CAST_END]]: +// CHECK-ADDRESSAUTH-NEXT: [[CAST_RESULT:%.*]] = phi ptr [ [[ADD_PTR]], %[[CAST_NOTNULL]] ], [ null, %[[ENTRY]] ] +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE1:%.*]] = load ptr, ptr [[CAST_RESULT]], align 8, !tbaa [[TBAA9]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[CAST_RESULT]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[VTABLE1]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP8:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP7]], i32 2, i64 [[TMP6]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[TMP10:%.*]] = load volatile i8, ptr [[TMP9]], align 8 +// CHECK-ADDRESSAUTH-NEXT: ret ptr [[TMP9]] +// +// CHECK-BOTHAUTH-LABEL: define ptr @_ZN5test16d_as_AEPNS_1DE( +// CHECK-BOTHAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-BOTHAUTH-NEXT: [[ENTRY:.*]]: +// CHECK-BOTHAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-BOTHAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA15]] +// CHECK-BOTHAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA15]] +// CHECK-BOTHAUTH-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +// CHECK-BOTHAUTH-NEXT: br i1 [[TMP1]], label %[[CAST_END:.*]], label %[[CAST_NOTNULL:.*]] +// CHECK-BOTHAUTH: [[CAST_NOTNULL]]: +// CHECK-BOTHAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-BOTHAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP2]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP5:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP4]], i32 2, i64 [[TMP3]]) +// CHECK-BOTHAUTH-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +// CHECK-BOTHAUTH-NEXT: [[VBASE_OFFSET_PTR:%.*]] = getelementptr i8, ptr [[TMP6]], i64 -32 +// CHECK-BOTHAUTH-NEXT: [[VBASE_OFFSET:%.*]] = load i64, ptr [[VBASE_OFFSET_PTR]], align 8 +// CHECK-BOTHAUTH-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 [[VBASE_OFFSET]] +// CHECK-BOTHAUTH-NEXT: br label %[[CAST_END]] +// CHECK-BOTHAUTH: [[CAST_END]]: +// CHECK-BOTHAUTH-NEXT: [[CAST_RESULT:%.*]] = phi ptr [ [[ADD_PTR]], %[[CAST_NOTNULL]] ], [ null, %[[ENTRY]] ] +// CHECK-BOTHAUTH-NEXT: [[VTABLE1:%.*]] = load ptr, ptr [[CAST_RESULT]], align 8, !tbaa [[TBAA9]] +// CHECK-BOTHAUTH-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[CAST_RESULT]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP8:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP7]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[VTABLE1]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP10:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP9]], i32 2, i64 [[TMP8]]) +// CHECK-BOTHAUTH-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +// CHECK-BOTHAUTH-NEXT: [[TMP12:%.*]] = load volatile i8, ptr [[TMP11]], align 8 +// CHECK-BOTHAUTH-NEXT: ret ptr [[TMP11]] +// const void *d_as_A(D *o) { static_assert(same_type::value); - // CHECK-NOAUTH: define ptr @_ZN5test16d_as_AEPNS_1DE(ptr %o) #0 { return __builtin_get_vtable_pointer((A *)o); - // CHECK-NOAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-NOAUTH: %vbase.offset.ptr = getelementptr i8, ptr %vtable, i64 -32 - // CHECK-NOAUTH: %vbase.offset = load i64, ptr %vbase.offset.ptr, align 8 - // CHECK-NOAUTH: %add.ptr = getelementptr inbounds i8, ptr %0, i64 %vbase.offset - // CHECK-NOAUTH: %cast.result = phi ptr [ %add.ptr, %cast.notnull ], [ null, %entry ] - // CHECK-NOAUTH: %vtable1 = load ptr, ptr %cast.result, align 8 - // CHECK-TYPEAUTH: %vtable1 = load ptr, ptr %cast.result, align 8 - // CHECK-TYPEAUTH: %5 = ptrtoint ptr %vtable1 to i64 - // CHECK-TYPEAUTH: %6 = call i64 @llvm.ptrauth.auth(i64 %5, i32 2, i64 48388) - // CHECK-TYPEAUTH: %7 = inttoptr i64 %6 to ptr - // CHECK-TYPEAUTH: %8 = load volatile i8, ptr %7, align 8 - // CHECK-ADDRESSAUTH: %6 = ptrtoint ptr %cast.result to i64 - // CHECK-ADDRESSAUTH: %7 = ptrtoint ptr %vtable1 to i64 - // CHECK-ADDRESSAUTH: %8 = call i64 @llvm.ptrauth.auth(i64 %7, i32 2, i64 %6) - // CHECK-ADDRESSAUTH: %9 = inttoptr i64 %8 to ptr - // CHECK-ADDRESSAUTH: %10 = load volatile i8, ptr %9, align 8 - // CHECK-BOTHAUTH: [[T1:%.*]] = ptrtoint ptr %cast.result to i64 - // CHECK-BOTHAUTH: [[T2:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[T1]], i64 48388) - // CHECK-BOTHAUTH: [[T3:%.*]] = ptrtoint ptr %vtable1 to i64 - // CHECK-BOTHAUTH: [[T4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[T3]], i32 2, i64 [[T2]]) - // CHECK-BOTHAUTH: [[T5:%.*]] = inttoptr i64 [[T4]] to ptr - // CHECK-BOTHAUTH: [[T6:%.*]] = load volatile i8, ptr [[T5]], align 8 } +// CHECK-NOAUTH-LABEL: define ptr @_ZN5test11eEPNS_1EE( +// CHECK-NOAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-NOAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-NOAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NOAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA17:![0-9]+]] +// CHECK-NOAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-NOAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-NOAUTH-NEXT: ret ptr [[VTABLE]] +// +// CHECK-TYPEAUTH-LABEL: define ptr @_ZN5test11eEPNS_1EE( +// CHECK-TYPEAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-TYPEAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-TYPEAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-TYPEAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA17:![0-9]+]] +// CHECK-TYPEAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-TYPEAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-TYPEAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP1]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +// CHECK-TYPEAUTH-NEXT: [[TMP4:%.*]] = load volatile i8, ptr [[TMP3]], align 8 +// CHECK-TYPEAUTH-NEXT: ret ptr [[TMP3]] +// +// CHECK-ADDRESSAUTH-LABEL: define ptr @_ZN5test11eEPNS_1EE( +// CHECK-ADDRESSAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-ADDRESSAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-ADDRESSAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-ADDRESSAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA17:![0-9]+]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP2]], i32 2, i64 [[TMP1]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[TMP5:%.*]] = load volatile i8, ptr [[TMP4]], align 8 +// CHECK-ADDRESSAUTH-NEXT: ret ptr [[TMP4]] +// +// CHECK-BOTHAUTH-LABEL: define ptr @_ZN5test11eEPNS_1EE( +// CHECK-BOTHAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-BOTHAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-BOTHAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-BOTHAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA17:![0-9]+]] +// CHECK-BOTHAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-BOTHAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-BOTHAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP1]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP3]], i32 2, i64 [[TMP2]]) +// CHECK-BOTHAUTH-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-BOTHAUTH-NEXT: [[TMP6:%.*]] = load volatile i8, ptr [[TMP5]], align 8 +// CHECK-BOTHAUTH-NEXT: ret ptr [[TMP5]] +// const void *e(E *o) { static_assert(same_type::value); - // CHECK-NOAUTH: define ptr @_ZN5test11eEPNS_1EE(ptr %o) #0 { return __builtin_get_vtable_pointer(o); - // CHECK-NOAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-TYPEAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-TYPEAUTH: %1 = ptrtoint ptr %vtable to i64 - // CHECK-TYPEAUTH: %2 = call i64 @llvm.ptrauth.auth(i64 %1, i32 2, i64 48388) - // CHECK-TYPEAUTH: %3 = inttoptr i64 %2 to ptr - // CHECK-TYPEAUTH: %4 = load volatile i8, ptr %3, align 8 - // CHECK-ADDRESSAUTH: [[T1:%.*]] = ptrtoint ptr %0 to i64 - // CHECK-ADDRESSAUTH: [[T2:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-ADDRESSAUTH: [[T3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[T2]], i32 2, i64 [[T1]]) - // CHECK-ADDRESSAUTH: [[T4:%.*]] = inttoptr i64 [[T3]] to ptr - // CHECK-ADDRESSAUTH: [[T5:%.*]] = load volatile i8, ptr [[T4]], align 8 - // CHECK-BOTHAUTH: [[T1:%.*]] = ptrtoint ptr %0 to i64 - // CHECK-BOTHAUTH: [[T2:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[T1]], i64 48388) - // CHECK-BOTHAUTH: [[T3:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-BOTHAUTH: [[T4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[T3]], i32 2, i64 [[T2]]) - // CHECK-BOTHAUTH: [[T5:%.*]] = inttoptr i64 [[T4]] to ptr - // CHECK-BOTHAUTH: [[T6:%.*]] = load volatile i8, ptr [[T5]], align 8 } +// CHECK-NOAUTH-LABEL: define ptr @_ZN5test16e_as_BEPNS_1EE( +// CHECK-NOAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-NOAUTH-NEXT: [[ENTRY:.*]]: +// CHECK-NOAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NOAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-NOAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-NOAUTH-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +// CHECK-NOAUTH-NEXT: br i1 [[TMP1]], label %[[CAST_END:.*]], label %[[CAST_NOTNULL:.*]] +// CHECK-NOAUTH: [[CAST_NOTNULL]]: +// CHECK-NOAUTH-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +// CHECK-NOAUTH-NEXT: br label %[[CAST_END]] +// CHECK-NOAUTH: [[CAST_END]]: +// CHECK-NOAUTH-NEXT: [[CAST_RESULT:%.*]] = phi ptr [ [[ADD_PTR]], %[[CAST_NOTNULL]] ], [ null, %[[ENTRY]] ] +// CHECK-NOAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[CAST_RESULT]], align 8, !tbaa [[TBAA9]] +// CHECK-NOAUTH-NEXT: ret ptr [[VTABLE]] +// +// CHECK-TYPEAUTH-LABEL: define ptr @_ZN5test16e_as_BEPNS_1EE( +// CHECK-TYPEAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-TYPEAUTH-NEXT: [[ENTRY:.*]]: +// CHECK-TYPEAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-TYPEAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-TYPEAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-TYPEAUTH-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +// CHECK-TYPEAUTH-NEXT: br i1 [[TMP1]], label %[[CAST_END:.*]], label %[[CAST_NOTNULL:.*]] +// CHECK-TYPEAUTH: [[CAST_NOTNULL]]: +// CHECK-TYPEAUTH-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +// CHECK-TYPEAUTH-NEXT: br label %[[CAST_END]] +// CHECK-TYPEAUTH: [[CAST_END]]: +// CHECK-TYPEAUTH-NEXT: [[CAST_RESULT:%.*]] = phi ptr [ [[ADD_PTR]], %[[CAST_NOTNULL]] ], [ null, %[[ENTRY]] ] +// CHECK-TYPEAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[CAST_RESULT]], align 8, !tbaa [[TBAA9]] +// CHECK-TYPEAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP2]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-TYPEAUTH-NEXT: [[TMP5:%.*]] = load volatile i8, ptr [[TMP4]], align 8 +// CHECK-TYPEAUTH-NEXT: ret ptr [[TMP4]] +// +// CHECK-ADDRESSAUTH-LABEL: define ptr @_ZN5test16e_as_BEPNS_1EE( +// CHECK-ADDRESSAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-ADDRESSAUTH-NEXT: [[ENTRY:.*]]: +// CHECK-ADDRESSAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-ADDRESSAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +// CHECK-ADDRESSAUTH-NEXT: br i1 [[TMP1]], label %[[CAST_END:.*]], label %[[CAST_NOTNULL:.*]] +// CHECK-ADDRESSAUTH: [[CAST_NOTNULL]]: +// CHECK-ADDRESSAUTH-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +// CHECK-ADDRESSAUTH-NEXT: br label %[[CAST_END]] +// CHECK-ADDRESSAUTH: [[CAST_END]]: +// CHECK-ADDRESSAUTH-NEXT: [[CAST_RESULT:%.*]] = phi ptr [ [[ADD_PTR]], %[[CAST_NOTNULL]] ], [ null, %[[ENTRY]] ] +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[CAST_RESULT]], align 8, !tbaa [[TBAA9]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[CAST_RESULT]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP3]], i32 2, i64 [[TMP2]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[TMP6:%.*]] = load volatile i8, ptr [[TMP5]], align 8 +// CHECK-ADDRESSAUTH-NEXT: ret ptr [[TMP5]] +// +// CHECK-BOTHAUTH-LABEL: define ptr @_ZN5test16e_as_BEPNS_1EE( +// CHECK-BOTHAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-BOTHAUTH-NEXT: [[ENTRY:.*]]: +// CHECK-BOTHAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-BOTHAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-BOTHAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-BOTHAUTH-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null +// CHECK-BOTHAUTH-NEXT: br i1 [[TMP1]], label %[[CAST_END:.*]], label %[[CAST_NOTNULL:.*]] +// CHECK-BOTHAUTH: [[CAST_NOTNULL]]: +// CHECK-BOTHAUTH-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +// CHECK-BOTHAUTH-NEXT: br label %[[CAST_END]] +// CHECK-BOTHAUTH: [[CAST_END]]: +// CHECK-BOTHAUTH-NEXT: [[CAST_RESULT:%.*]] = phi ptr [ [[ADD_PTR]], %[[CAST_NOTNULL]] ], [ null, %[[ENTRY]] ] +// CHECK-BOTHAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[CAST_RESULT]], align 8, !tbaa [[TBAA9]] +// CHECK-BOTHAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[CAST_RESULT]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP2]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP5:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP4]], i32 2, i64 [[TMP3]]) +// CHECK-BOTHAUTH-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +// CHECK-BOTHAUTH-NEXT: [[TMP7:%.*]] = load volatile i8, ptr [[TMP6]], align 8 +// CHECK-BOTHAUTH-NEXT: ret ptr [[TMP6]] +// const void *e_as_B(E *o) { static_assert(same_type::value); - // CHECK-NOAUTH: define ptr @_ZN5test16e_as_BEPNS_1EE(ptr %o) #0 { return __builtin_get_vtable_pointer((B *)o); - // CHECK-NOAUTH: %add.ptr = getelementptr inbounds i8, ptr %0, i64 8 - // CHECK-NOAUTH: %cast.result = phi ptr [ %add.ptr, %cast.notnull ], [ null, %entry ] - // CHECK-NOAUTH: %vtable = load ptr, ptr %cast.result, align 8 - // CHECK-TYPEAUTH: %vtable = load ptr, ptr %cast.result, align 8 - // CHECK-TYPEAUTH: %2 = ptrtoint ptr %vtable to i64 - // CHECK-TYPEAUTH: %3 = call i64 @llvm.ptrauth.auth(i64 %2, i32 2, i64 48388) - // CHECK-TYPEAUTH: %4 = inttoptr i64 %3 to ptr - // CHECK-TYPEAUTH: %5 = load volatile i8, ptr %4, align 8 - // CHECK-ADDRESSAUTH: [[T1:%.*]] = ptrtoint ptr %cast.result to i64 - // CHECK-ADDRESSAUTH: [[T2:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-ADDRESSAUTH: [[T3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[T2]], i32 2, i64 [[T1]]) - // CHECK-ADDRESSAUTH: [[T4:%.*]] = inttoptr i64 [[T3]] to ptr - // CHECK-ADDRESSAUTH: [[T5:%.*]] = load volatile i8, ptr [[T4]], align 8 - // CHECK-BOTHAUTH: [[T1:%.*]] = ptrtoint ptr %cast.result to i64 - // CHECK-BOTHAUTH: [[T2:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[T1]], i64 48388) - // CHECK-BOTHAUTH: [[T3:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-BOTHAUTH: [[T4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[T3]], i32 2, i64 [[T2]]) - // CHECK-BOTHAUTH: [[T5:%.*]] = inttoptr i64 [[T4]] to ptr - // CHECK-BOTHAUTH: [[T6:%.*]] = load volatile i8, ptr [[T5]], align 8 } +// CHECK-NOAUTH-LABEL: define ptr @_ZN5test16e_as_DEPNS_1EE( +// CHECK-NOAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-NOAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-NOAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NOAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-NOAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-NOAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-NOAUTH-NEXT: ret ptr [[VTABLE]] +// +// CHECK-TYPEAUTH-LABEL: define ptr @_ZN5test16e_as_DEPNS_1EE( +// CHECK-TYPEAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-TYPEAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-TYPEAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-TYPEAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-TYPEAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-TYPEAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-TYPEAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP1]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +// CHECK-TYPEAUTH-NEXT: [[TMP4:%.*]] = load volatile i8, ptr [[TMP3]], align 8 +// CHECK-TYPEAUTH-NEXT: ret ptr [[TMP3]] +// +// CHECK-ADDRESSAUTH-LABEL: define ptr @_ZN5test16e_as_DEPNS_1EE( +// CHECK-ADDRESSAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-ADDRESSAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-ADDRESSAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-ADDRESSAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP2]], i32 2, i64 [[TMP1]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[TMP5:%.*]] = load volatile i8, ptr [[TMP4]], align 8 +// CHECK-ADDRESSAUTH-NEXT: ret ptr [[TMP4]] +// +// CHECK-BOTHAUTH-LABEL: define ptr @_ZN5test16e_as_DEPNS_1EE( +// CHECK-BOTHAUTH-SAME: ptr [[O:%.*]]) #[[ATTR0]] { +// CHECK-BOTHAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-BOTHAUTH-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-BOTHAUTH-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-BOTHAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK-BOTHAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-BOTHAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP1]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP3]], i32 2, i64 [[TMP2]]) +// CHECK-BOTHAUTH-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-BOTHAUTH-NEXT: [[TMP6:%.*]] = load volatile i8, ptr [[TMP5]], align 8 +// CHECK-BOTHAUTH-NEXT: ret ptr [[TMP5]] +// const void *e_as_D(E *o) { static_assert(same_type::value); - // CHECK-NOAUTH: define ptr @_ZN5test16e_as_DEPNS_1EE(ptr %o) #0 { return __builtin_get_vtable_pointer((D *)o); - // CHECK-NOAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-TYPEAUTH: %vtable = load ptr, ptr %0, align 8 - // CHECK-TYPEAUTH: %1 = ptrtoint ptr %vtable to i64 - // CHECK-TYPEAUTH: %2 = call i64 @llvm.ptrauth.auth(i64 %1, i32 2, i64 48388) - // CHECK-TYPEAUTH: %3 = inttoptr i64 %2 to ptr - // CHECK-TYPEAUTH: %4 = load volatile i8, ptr %3, align 8 - // CHECK-ADDRESSAUTH: [[T1:%.*]] = ptrtoint ptr %0 to i64 - // CHECK-ADDRESSAUTH: [[T2:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-ADDRESSAUTH: [[T3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[T2]], i32 2, i64 [[T1]]) - // CHECK-ADDRESSAUTH: [[T4:%.*]] = inttoptr i64 [[T3]] to ptr - // CHECK-ADDRESSAUTH: [[T5:%.*]] = load volatile i8, ptr [[T4]], align 8 - // CHECK-BOTHAUTH: [[T1:%.*]] = ptrtoint ptr %0 to i64 - // CHECK-BOTHAUTH: [[T2:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[T1]], i64 48388) - // CHECK-BOTHAUTH: [[T3:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-BOTHAUTH: [[T4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[T3]], i32 2, i64 [[T2]]) - // CHECK-BOTHAUTH: [[T5:%.*]] = inttoptr i64 [[T4]] to ptr - // CHECK-BOTHAUTH: [[T6:%.*]] = load volatile i8, ptr [[T5]], align 8 } +// CHECK-NOAUTH-LABEL: define ptr @aArrayParameter( +// CHECK-NOAUTH-SAME: ptr [[AARRAY:%.*]]) #[[ATTR0]] { +// CHECK-NOAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-NOAUTH-NEXT: [[AARRAY_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NOAUTH-NEXT: store ptr [[AARRAY]], ptr [[AARRAY_ADDR]], align 8, !tbaa [[TBAA6]] +// CHECK-NOAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AARRAY_ADDR]], align 8, !tbaa [[TBAA6]] +// CHECK-NOAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-NOAUTH-NEXT: ret ptr [[VTABLE]] +// +// CHECK-TYPEAUTH-LABEL: define ptr @aArrayParameter( +// CHECK-TYPEAUTH-SAME: ptr [[AARRAY:%.*]]) #[[ATTR0]] { +// CHECK-TYPEAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-TYPEAUTH-NEXT: [[AARRAY_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-TYPEAUTH-NEXT: store ptr [[AARRAY]], ptr [[AARRAY_ADDR]], align 8, !tbaa [[TBAA6]] +// CHECK-TYPEAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AARRAY_ADDR]], align 8, !tbaa [[TBAA6]] +// CHECK-TYPEAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-TYPEAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP1]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +// CHECK-TYPEAUTH-NEXT: [[TMP4:%.*]] = load volatile i8, ptr [[TMP3]], align 8 +// CHECK-TYPEAUTH-NEXT: ret ptr [[TMP3]] +// +// CHECK-ADDRESSAUTH-LABEL: define ptr @aArrayParameter( +// CHECK-ADDRESSAUTH-SAME: ptr [[AARRAY:%.*]]) #[[ATTR0]] { +// CHECK-ADDRESSAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-ADDRESSAUTH-NEXT: [[AARRAY_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-ADDRESSAUTH-NEXT: store ptr [[AARRAY]], ptr [[AARRAY_ADDR]], align 8, !tbaa [[TBAA6]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AARRAY_ADDR]], align 8, !tbaa [[TBAA6]] +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP2]], i32 2, i64 [[TMP1]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[TMP5:%.*]] = load volatile i8, ptr [[TMP4]], align 8 +// CHECK-ADDRESSAUTH-NEXT: ret ptr [[TMP4]] +// +// CHECK-BOTHAUTH-LABEL: define ptr @aArrayParameter( +// CHECK-BOTHAUTH-SAME: ptr [[AARRAY:%.*]]) #[[ATTR0]] { +// CHECK-BOTHAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-BOTHAUTH-NEXT: [[AARRAY_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-BOTHAUTH-NEXT: store ptr [[AARRAY]], ptr [[AARRAY_ADDR]], align 8, !tbaa [[TBAA6]] +// CHECK-BOTHAUTH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AARRAY_ADDR]], align 8, !tbaa [[TBAA6]] +// CHECK-BOTHAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA9]] +// CHECK-BOTHAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP1]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP3]], i32 2, i64 [[TMP2]]) +// CHECK-BOTHAUTH-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-BOTHAUTH-NEXT: [[TMP6:%.*]] = load volatile i8, ptr [[TMP5]], align 8 +// CHECK-BOTHAUTH-NEXT: ret ptr [[TMP5]] +// extern "C" const void *aArrayParameter(A aArray[]) { static_assert(same_type::value); - // CHECK-NOAUTH: [[THIS_OBJ:%.*]] = load ptr, ptr %aArray.addr - // CHECK-NOAUTH: %vtable = load ptr, ptr [[THIS_OBJ]] - // CHECK-TYPEAUTH: [[THIS_OBJ:%.*]] = load ptr, ptr %aArray.addr - // CHECK-TYPEAUTH: %vtable = load ptr, ptr [[THIS_OBJ]] - // CHECK-TYPEAUTH: [[VTABLEI:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-TYPEAUTH: [[AUTHENTICATED:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[VTABLEI]], i32 2, i64 48388) - // CHECK-ADDRESSAUTH: [[VTABLE_ADDR:%.*]] = load ptr, ptr %aArray.addr, align 8, !tbaa !2 - // CHECK-ADDRESSAUTH: %vtable = load ptr, ptr %0, align 8, !tbaa !7 - // CHECK-ADDRESSAUTH: [[VTABLE_ADDRI:%.*]] = ptrtoint ptr [[VTABLE_ADDR]] to i64 - // CHECK-ADDRESSAUTH: [[VTABLEI:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-ADDRESSAUTH: [[AUTHENTICATED:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[VTABLEI]], i32 2, i64 [[VTABLE_ADDRI]]) - // CHECK-BOTHAUTH: [[VTABLE_ADDR:%.*]] = load ptr, ptr %aArray.addr, align 8, !tbaa !2 - // CHECK-BOTHAUTH: %vtable = load ptr, ptr [[VTABLE_ADDR]], align 8, !tbaa !7 - // CHECK-BOTHAUTH: [[VTABLE_ADDRI:%.*]] = ptrtoint ptr [[VTABLE_ADDR]] to i64 - // CHECK-BOTHAUTH: [[VTABLE_DISC:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[VTABLE_ADDRI]], i64 48388) - // CHECK-BOTHAUTH: [[VTABLE_PTR:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-BOTHAUTH: [[AUTHENTICATED:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[VTABLE_PTR]], i32 2, i64 [[VTABLE_DISC]]) return __builtin_get_vtable_pointer(aArray); } +// CHECK-NOAUTH-LABEL: define ptr @aArrayLocal( +// CHECK-NOAUTH-SAME: ) #[[ATTR0]] { +// CHECK-NOAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-NOAUTH-NEXT: [[ARRAY:%.*]] = alloca [1 x %"struct.test1::A"], align 8 +// CHECK-NOAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[ARRAY]]) #[[ATTR5:[0-9]+]] +// CHECK-NOAUTH-NEXT: call void @_ZN5test11AC1Ev(ptr nonnull align 8 dereferenceable(8) [[ARRAY]]) +// CHECK-NOAUTH-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %"struct.test1::A"], ptr [[ARRAY]], i64 0, i64 0 +// CHECK-NOAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[ARRAYDECAY]], align 8, !tbaa [[TBAA9]] +// CHECK-NOAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[ARRAY]]) #[[ATTR5]] +// CHECK-NOAUTH-NEXT: ret ptr [[VTABLE]] +// +// CHECK-TYPEAUTH-LABEL: define ptr @aArrayLocal( +// CHECK-TYPEAUTH-SAME: ) #[[ATTR0]] { +// CHECK-TYPEAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-TYPEAUTH-NEXT: [[ARRAY:%.*]] = alloca [1 x %"struct.test1::A"], align 8 +// CHECK-TYPEAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[ARRAY]]) #[[ATTR7:[0-9]+]] +// CHECK-TYPEAUTH-NEXT: [[CALL:%.*]] = call ptr @_ZN5test11AC1Ev(ptr nonnull align 8 dereferenceable(8) [[ARRAY]]) +// CHECK-TYPEAUTH-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %"struct.test1::A"], ptr [[ARRAY]], i64 0, i64 0 +// CHECK-TYPEAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[ARRAYDECAY]], align 8, !tbaa [[TBAA9]] +// CHECK-TYPEAUTH-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP1:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP0]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr +// CHECK-TYPEAUTH-NEXT: [[TMP3:%.*]] = load volatile i8, ptr [[TMP2]], align 8 +// CHECK-TYPEAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[ARRAY]]) #[[ATTR7]] +// CHECK-TYPEAUTH-NEXT: ret ptr [[TMP2]] +// +// CHECK-ADDRESSAUTH-LABEL: define ptr @aArrayLocal( +// CHECK-ADDRESSAUTH-SAME: ) #[[ATTR0]] { +// CHECK-ADDRESSAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-ADDRESSAUTH-NEXT: [[ARRAY:%.*]] = alloca [1 x %"struct.test1::A"], align 8 +// CHECK-ADDRESSAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[ARRAY]]) #[[ATTR7:[0-9]+]] +// CHECK-ADDRESSAUTH-NEXT: [[CALL:%.*]] = call ptr @_ZN5test11AC1Ev(ptr nonnull align 8 dereferenceable(8) [[ARRAY]]) +// CHECK-ADDRESSAUTH-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %"struct.test1::A"], ptr [[ARRAY]], i64 0, i64 0 +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[ARRAYDECAY]], align 8, !tbaa [[TBAA9]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARRAYDECAY]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP2:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP1]], i32 2, i64 [[TMP0]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[TMP4:%.*]] = load volatile i8, ptr [[TMP3]], align 8 +// CHECK-ADDRESSAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[ARRAY]]) #[[ATTR7]] +// CHECK-ADDRESSAUTH-NEXT: ret ptr [[TMP3]] +// +// CHECK-BOTHAUTH-LABEL: define ptr @aArrayLocal( +// CHECK-BOTHAUTH-SAME: ) #[[ATTR0]] { +// CHECK-BOTHAUTH-NEXT: [[ENTRY:.*:]] +// CHECK-BOTHAUTH-NEXT: [[ARRAY:%.*]] = alloca [1 x %"struct.test1::A"], align 8 +// CHECK-BOTHAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[ARRAY]]) #[[ATTR7:[0-9]+]] +// CHECK-BOTHAUTH-NEXT: [[CALL:%.*]] = call ptr @_ZN5test11AC1Ev(ptr nonnull align 8 dereferenceable(8) [[ARRAY]]) +// CHECK-BOTHAUTH-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %"struct.test1::A"], ptr [[ARRAY]], i64 0, i64 0 +// CHECK-BOTHAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[ARRAYDECAY]], align 8, !tbaa [[TBAA9]] +// CHECK-BOTHAUTH-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARRAYDECAY]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP1:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP0]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP2]], i32 2, i64 [[TMP1]]) +// CHECK-BOTHAUTH-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-BOTHAUTH-NEXT: [[TMP5:%.*]] = load volatile i8, ptr [[TMP4]], align 8 +// CHECK-BOTHAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[ARRAY]]) #[[ATTR7]] +// CHECK-BOTHAUTH-NEXT: ret ptr [[TMP4]] +// extern "C" const void *aArrayLocal() { A array[] = { A() }; static_assert(same_type::value); - // CHECK-NOAUTH: [[THIS_OBJ:%.*]] = getelementptr inbounds [1 x %"struct.test1::A"], ptr %array - // CHECK-NOAUTH: %vtable = load ptr, ptr %arraydecay - // CHECK-TYPEAUTH: %arraydecay = getelementptr inbounds [1 x %"struct.test1::A"] - // CHECK-TYPEAUTH: %vtable = load ptr, ptr %arraydecay - // CHECK-TYPEAUTH: [[VTABLEI:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-TYPEAUTH: [[AUTHENTICATED:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[VTABLEI]], i32 2, i64 48388) - // CHECK-ADDRESSAUTH: %arraydecay = getelementptr inbounds [1 x %"struct.test1::A"], ptr %array, i64 0, i64 0 - // CHECK-ADDRESSAUTH: %vtable = load ptr, ptr %arraydecay, align 8, !tbaa !7 - // CHECK-ADDRESSAUTH: [[VTABLE_ADDRI:%.*]] = ptrtoint ptr %arraydecay to i64 - // CHECK-ADDRESSAUTH: [[VTABLEI:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-ADDRESSAUTH: [[AUTHENTICATED:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[VTABLEI]], i32 2, i64 [[VTABLE_ADDRI]]) - // CHECK-BOTHAUTH: %arraydecay = getelementptr inbounds [1 x %"struct.test1::A"], ptr %array, i64 0, i64 0 - // CHECK-BOTHAUTH: %vtable = load ptr, ptr %arraydecay, align 8, !tbaa !7 - // CHECK-BOTHAUTH: [[VTABLE_ADDRI:%.*]] = ptrtoint ptr %arraydecay to i64 - // CHECK-BOTHAUTH: [[VTABLE_DISC:%.*]] = call i64 @llvm.ptrauth.blend(i64 %0, i64 48388) - // CHECK-BOTHAUTH: [[VTABLEI:%.*]] = ptrtoint ptr %vtable to i64 - // CHECK-BOTHAUTH: [[AUTHENTICATED:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[VTABLEI]], i32 2, i64 [[VTABLE_DISC]]) return __builtin_get_vtable_pointer(array); } +// CHECK-NOAUTH-LABEL: define void @_ZN5test14testEv( +// CHECK-NOAUTH-SAME: ) #[[ATTR0]] { +// CHECK-NOAUTH-NEXT: [[ENTRY:.*]]: +// CHECK-NOAUTH-NEXT: [[AINSTANCE:%.*]] = alloca %"struct.test1::A", align 8 +// CHECK-NOAUTH-NEXT: [[BINSTANCE:%.*]] = alloca %"struct.test1::B", align 8 +// CHECK-NOAUTH-NEXT: [[CINSTANCE:%.*]] = alloca %"struct.test1::C", align 8 +// CHECK-NOAUTH-NEXT: [[DINSTANCE:%.*]] = alloca %"struct.test1::D", align 8 +// CHECK-NOAUTH-NEXT: [[EINSTANCE:%.*]] = alloca %"struct.test1::E", align 8 +// CHECK-NOAUTH-NEXT: [[EARRAY:%.*]] = alloca [1 x %"struct.test1::E"], align 16 +// CHECK-NOAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[AINSTANCE]]) #[[ATTR5]] +// CHECK-NOAUTH-NEXT: call void @_ZN5test11AC1Ev(ptr nonnull align 8 dereferenceable(8) [[AINSTANCE]]) +// CHECK-NOAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[BINSTANCE]]) #[[ATTR5]] +// CHECK-NOAUTH-NEXT: call void @_ZN5test11BC1Ev(ptr nonnull align 8 dereferenceable(8) [[BINSTANCE]]) +// CHECK-NOAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[CINSTANCE]]) #[[ATTR5]] +// CHECK-NOAUTH-NEXT: call void @_ZN5test11CC1Ev(ptr nonnull align 8 dereferenceable(16) [[CINSTANCE]]) +// CHECK-NOAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[DINSTANCE]]) #[[ATTR5]] +// CHECK-NOAUTH-NEXT: call void @_ZN5test11DC1Ev(ptr nonnull align 8 dereferenceable(8) [[DINSTANCE]]) +// CHECK-NOAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[EINSTANCE]]) #[[ATTR5]] +// CHECK-NOAUTH-NEXT: call void @_ZN5test11EC1Ev(ptr nonnull align 8 dereferenceable(16) [[EINSTANCE]]) +// CHECK-NOAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[EARRAY]]) #[[ATTR5]] +// CHECK-NOAUTH-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[EARRAY]], i8 0, i64 16, i1 false) +// CHECK-NOAUTH-NEXT: call void @_ZN5test11EC1Ev(ptr nonnull align 8 dereferenceable(16) [[EARRAY]]) +// CHECK-NOAUTH-NEXT: [[CALL:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[AINSTANCE]]) +// CHECK-NOAUTH-NEXT: [[CALL1:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[BINSTANCE]]) +// CHECK-NOAUTH-NEXT: [[TMP0:%.*]] = icmp eq ptr [[CINSTANCE]], null +// CHECK-NOAUTH-NEXT: br i1 [[TMP0]], label %[[CAST_END:.*]], label %[[CAST_NOTNULL:.*]] +// CHECK-NOAUTH: [[CAST_NOTNULL]]: +// CHECK-NOAUTH-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[CINSTANCE]], i64 8 +// CHECK-NOAUTH-NEXT: br label %[[CAST_END]] +// CHECK-NOAUTH: [[CAST_END]]: +// CHECK-NOAUTH-NEXT: [[CAST_RESULT:%.*]] = phi ptr [ [[ADD_PTR]], %[[CAST_NOTNULL]] ], [ null, %[[ENTRY]] ] +// CHECK-NOAUTH-NEXT: [[CALL2:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[CAST_RESULT]]) +// CHECK-NOAUTH-NEXT: [[TMP1:%.*]] = icmp eq ptr [[DINSTANCE]], null +// CHECK-NOAUTH-NEXT: br i1 [[TMP1]], label %[[CAST_END5:.*]], label %[[CAST_NOTNULL3:.*]] +// CHECK-NOAUTH: [[CAST_NOTNULL3]]: +// CHECK-NOAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[DINSTANCE]], align 8, !tbaa [[TBAA9]] +// CHECK-NOAUTH-NEXT: [[VBASE_OFFSET_PTR:%.*]] = getelementptr i8, ptr [[VTABLE]], i64 -32 +// CHECK-NOAUTH-NEXT: [[VBASE_OFFSET:%.*]] = load i64, ptr [[VBASE_OFFSET_PTR]], align 8 +// CHECK-NOAUTH-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds i8, ptr [[DINSTANCE]], i64 [[VBASE_OFFSET]] +// CHECK-NOAUTH-NEXT: br label %[[CAST_END5]] +// CHECK-NOAUTH: [[CAST_END5]]: +// CHECK-NOAUTH-NEXT: [[CAST_RESULT6:%.*]] = phi ptr [ [[ADD_PTR4]], %[[CAST_NOTNULL3]] ], [ null, %[[CAST_END]] ] +// CHECK-NOAUTH-NEXT: [[CALL7:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[CAST_RESULT6]]) +// CHECK-NOAUTH-NEXT: [[TMP2:%.*]] = icmp eq ptr [[EINSTANCE]], null +// CHECK-NOAUTH-NEXT: br i1 [[TMP2]], label %[[CAST_END13:.*]], label %[[CAST_NOTNULL8:.*]] +// CHECK-NOAUTH: [[CAST_NOTNULL8]]: +// CHECK-NOAUTH-NEXT: [[VTABLE9:%.*]] = load ptr, ptr [[EINSTANCE]], align 8, !tbaa [[TBAA9]] +// CHECK-NOAUTH-NEXT: [[VBASE_OFFSET_PTR10:%.*]] = getelementptr i8, ptr [[VTABLE9]], i64 -32 +// CHECK-NOAUTH-NEXT: [[VBASE_OFFSET11:%.*]] = load i64, ptr [[VBASE_OFFSET_PTR10]], align 8 +// CHECK-NOAUTH-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, ptr [[EINSTANCE]], i64 [[VBASE_OFFSET11]] +// CHECK-NOAUTH-NEXT: br label %[[CAST_END13]] +// CHECK-NOAUTH: [[CAST_END13]]: +// CHECK-NOAUTH-NEXT: [[CAST_RESULT14:%.*]] = phi ptr [ [[ADD_PTR12]], %[[CAST_NOTNULL8]] ], [ null, %[[CAST_END5]] ] +// CHECK-NOAUTH-NEXT: [[CALL15:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[CAST_RESULT14]]) +// CHECK-NOAUTH-NEXT: [[TMP3:%.*]] = icmp eq ptr [[EINSTANCE]], null +// CHECK-NOAUTH-NEXT: br i1 [[TMP3]], label %[[CAST_END18:.*]], label %[[CAST_NOTNULL16:.*]] +// CHECK-NOAUTH: [[CAST_NOTNULL16]]: +// CHECK-NOAUTH-NEXT: [[ADD_PTR17:%.*]] = getelementptr inbounds i8, ptr [[EINSTANCE]], i64 8 +// CHECK-NOAUTH-NEXT: br label %[[CAST_END18]] +// CHECK-NOAUTH: [[CAST_END18]]: +// CHECK-NOAUTH-NEXT: [[CAST_RESULT19:%.*]] = phi ptr [ [[ADD_PTR17]], %[[CAST_NOTNULL16]] ], [ null, %[[CAST_END13]] ] +// CHECK-NOAUTH-NEXT: [[CALL20:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[CAST_RESULT19]]) +// CHECK-NOAUTH-NEXT: [[CALL21:%.*]] = call ptr @_ZN5test11bEPNS_1BE(ptr [[BINSTANCE]]) +// CHECK-NOAUTH-NEXT: [[TMP4:%.*]] = icmp eq ptr [[CINSTANCE]], null +// CHECK-NOAUTH-NEXT: br i1 [[TMP4]], label %[[CAST_END24:.*]], label %[[CAST_NOTNULL22:.*]] +// CHECK-NOAUTH: [[CAST_NOTNULL22]]: +// CHECK-NOAUTH-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, ptr [[CINSTANCE]], i64 8 +// CHECK-NOAUTH-NEXT: br label %[[CAST_END24]] +// CHECK-NOAUTH: [[CAST_END24]]: +// CHECK-NOAUTH-NEXT: [[CAST_RESULT25:%.*]] = phi ptr [ [[ADD_PTR23]], %[[CAST_NOTNULL22]] ], [ null, %[[CAST_END18]] ] +// CHECK-NOAUTH-NEXT: [[CALL26:%.*]] = call ptr @_ZN5test11bEPNS_1BE(ptr [[CAST_RESULT25]]) +// CHECK-NOAUTH-NEXT: [[TMP5:%.*]] = icmp eq ptr [[EINSTANCE]], null +// CHECK-NOAUTH-NEXT: br i1 [[TMP5]], label %[[CAST_END29:.*]], label %[[CAST_NOTNULL27:.*]] +// CHECK-NOAUTH: [[CAST_NOTNULL27]]: +// CHECK-NOAUTH-NEXT: [[ADD_PTR28:%.*]] = getelementptr inbounds i8, ptr [[EINSTANCE]], i64 8 +// CHECK-NOAUTH-NEXT: br label %[[CAST_END29]] +// CHECK-NOAUTH: [[CAST_END29]]: +// CHECK-NOAUTH-NEXT: [[CAST_RESULT30:%.*]] = phi ptr [ [[ADD_PTR28]], %[[CAST_NOTNULL27]] ], [ null, %[[CAST_END24]] ] +// CHECK-NOAUTH-NEXT: [[CALL31:%.*]] = call ptr @_ZN5test11bEPNS_1BE(ptr [[CAST_RESULT30]]) +// CHECK-NOAUTH-NEXT: [[CALL32:%.*]] = call ptr @_ZN5test16b_as_AEPNS_1BE(ptr [[BINSTANCE]]) +// CHECK-NOAUTH-NEXT: [[CALL33:%.*]] = call ptr @_ZN5test11cEPNS_1CE(ptr [[CINSTANCE]]) +// CHECK-NOAUTH-NEXT: [[CALL34:%.*]] = call ptr @_ZN5test16c_as_ZEPNS_1CE(ptr [[CINSTANCE]]) +// CHECK-NOAUTH-NEXT: [[CALL35:%.*]] = call ptr @_ZN5test16c_as_BEPNS_1CE(ptr [[CINSTANCE]]) +// CHECK-NOAUTH-NEXT: [[CALL36:%.*]] = call ptr @_ZN5test11dEPNS_1DE(ptr [[DINSTANCE]]) +// CHECK-NOAUTH-NEXT: [[CALL37:%.*]] = call ptr @_ZN5test11dEPNS_1DE(ptr [[EINSTANCE]]) +// CHECK-NOAUTH-NEXT: [[CALL38:%.*]] = call ptr @_ZN5test16d_as_AEPNS_1DE(ptr [[DINSTANCE]]) +// CHECK-NOAUTH-NEXT: [[CALL39:%.*]] = call ptr @_ZN5test16d_as_AEPNS_1DE(ptr [[EINSTANCE]]) +// CHECK-NOAUTH-NEXT: [[CALL40:%.*]] = call ptr @_ZN5test11eEPNS_1EE(ptr [[EINSTANCE]]) +// CHECK-NOAUTH-NEXT: [[CALL41:%.*]] = call ptr @_ZN5test16e_as_BEPNS_1EE(ptr [[EINSTANCE]]) +// CHECK-NOAUTH-NEXT: [[CALL42:%.*]] = call ptr @_ZN5test16e_as_DEPNS_1EE(ptr [[EINSTANCE]]) +// CHECK-NOAUTH-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %"struct.test1::E"], ptr [[EARRAY]], i64 0, i64 0 +// CHECK-NOAUTH-NEXT: [[VTABLE43:%.*]] = load ptr, ptr [[ARRAYDECAY]], align 16, !tbaa [[TBAA9]] +// CHECK-NOAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[EARRAY]]) #[[ATTR5]] +// CHECK-NOAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[EINSTANCE]]) #[[ATTR5]] +// CHECK-NOAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[DINSTANCE]]) #[[ATTR5]] +// CHECK-NOAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[CINSTANCE]]) #[[ATTR5]] +// CHECK-NOAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[BINSTANCE]]) #[[ATTR5]] +// CHECK-NOAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[AINSTANCE]]) #[[ATTR5]] +// CHECK-NOAUTH-NEXT: ret void +// +// CHECK-TYPEAUTH-LABEL: define void @_ZN5test14testEv( +// CHECK-TYPEAUTH-SAME: ) #[[ATTR0]] { +// CHECK-TYPEAUTH-NEXT: [[ENTRY:.*]]: +// CHECK-TYPEAUTH-NEXT: [[AINSTANCE:%.*]] = alloca %"struct.test1::A", align 8 +// CHECK-TYPEAUTH-NEXT: [[BINSTANCE:%.*]] = alloca %"struct.test1::B", align 8 +// CHECK-TYPEAUTH-NEXT: [[CINSTANCE:%.*]] = alloca %"struct.test1::C", align 8 +// CHECK-TYPEAUTH-NEXT: [[DINSTANCE:%.*]] = alloca %"struct.test1::D", align 8 +// CHECK-TYPEAUTH-NEXT: [[EINSTANCE:%.*]] = alloca %"struct.test1::E", align 8 +// CHECK-TYPEAUTH-NEXT: [[EARRAY:%.*]] = alloca [1 x %"struct.test1::E"], align 8 +// CHECK-TYPEAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[AINSTANCE]]) #[[ATTR7]] +// CHECK-TYPEAUTH-NEXT: [[CALL:%.*]] = call ptr @_ZN5test11AC1Ev(ptr nonnull align 8 dereferenceable(8) [[AINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[BINSTANCE]]) #[[ATTR7]] +// CHECK-TYPEAUTH-NEXT: [[CALL1:%.*]] = call ptr @_ZN5test11BC1Ev(ptr nonnull align 8 dereferenceable(8) [[BINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[CINSTANCE]]) #[[ATTR7]] +// CHECK-TYPEAUTH-NEXT: [[CALL2:%.*]] = call ptr @_ZN5test11CC1Ev(ptr nonnull align 8 dereferenceable(16) [[CINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[DINSTANCE]]) #[[ATTR7]] +// CHECK-TYPEAUTH-NEXT: [[CALL3:%.*]] = call ptr @_ZN5test11DC1Ev(ptr nonnull align 8 dereferenceable(8) [[DINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[EINSTANCE]]) #[[ATTR7]] +// CHECK-TYPEAUTH-NEXT: [[CALL4:%.*]] = call ptr @_ZN5test11EC1Ev(ptr nonnull align 8 dereferenceable(16) [[EINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[EARRAY]]) #[[ATTR7]] +// CHECK-TYPEAUTH-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[EARRAY]], i8 0, i64 16, i1 false) +// CHECK-TYPEAUTH-NEXT: [[CALL5:%.*]] = call ptr @_ZN5test11EC1Ev(ptr nonnull align 8 dereferenceable(16) [[EARRAY]]) +// CHECK-TYPEAUTH-NEXT: [[CALL6:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[AINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: [[CALL7:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[BINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: [[TMP0:%.*]] = icmp eq ptr [[CINSTANCE]], null +// CHECK-TYPEAUTH-NEXT: br i1 [[TMP0]], label %[[CAST_END:.*]], label %[[CAST_NOTNULL:.*]] +// CHECK-TYPEAUTH: [[CAST_NOTNULL]]: +// CHECK-TYPEAUTH-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[CINSTANCE]], i64 8 +// CHECK-TYPEAUTH-NEXT: br label %[[CAST_END]] +// CHECK-TYPEAUTH: [[CAST_END]]: +// CHECK-TYPEAUTH-NEXT: [[CAST_RESULT:%.*]] = phi ptr [ [[ADD_PTR]], %[[CAST_NOTNULL]] ], [ null, %[[ENTRY]] ] +// CHECK-TYPEAUTH-NEXT: [[CALL8:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[CAST_RESULT]]) +// CHECK-TYPEAUTH-NEXT: [[TMP1:%.*]] = icmp eq ptr [[DINSTANCE]], null +// CHECK-TYPEAUTH-NEXT: br i1 [[TMP1]], label %[[CAST_END11:.*]], label %[[CAST_NOTNULL9:.*]] +// CHECK-TYPEAUTH: [[CAST_NOTNULL9]]: +// CHECK-TYPEAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[DINSTANCE]], align 8, !tbaa [[TBAA9]] +// CHECK-TYPEAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP2]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-TYPEAUTH-NEXT: [[VBASE_OFFSET_PTR:%.*]] = getelementptr i8, ptr [[TMP4]], i64 -32 +// CHECK-TYPEAUTH-NEXT: [[VBASE_OFFSET:%.*]] = load i64, ptr [[VBASE_OFFSET_PTR]], align 8 +// CHECK-TYPEAUTH-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, ptr [[DINSTANCE]], i64 [[VBASE_OFFSET]] +// CHECK-TYPEAUTH-NEXT: br label %[[CAST_END11]] +// CHECK-TYPEAUTH: [[CAST_END11]]: +// CHECK-TYPEAUTH-NEXT: [[CAST_RESULT12:%.*]] = phi ptr [ [[ADD_PTR10]], %[[CAST_NOTNULL9]] ], [ null, %[[CAST_END]] ] +// CHECK-TYPEAUTH-NEXT: [[CALL13:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[CAST_RESULT12]]) +// CHECK-TYPEAUTH-NEXT: [[TMP5:%.*]] = icmp eq ptr [[EINSTANCE]], null +// CHECK-TYPEAUTH-NEXT: br i1 [[TMP5]], label %[[CAST_END19:.*]], label %[[CAST_NOTNULL14:.*]] +// CHECK-TYPEAUTH: [[CAST_NOTNULL14]]: +// CHECK-TYPEAUTH-NEXT: [[VTABLE15:%.*]] = load ptr, ptr [[EINSTANCE]], align 8, !tbaa [[TBAA9]] +// CHECK-TYPEAUTH-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[VTABLE15]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP7:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP6]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-TYPEAUTH-NEXT: [[VBASE_OFFSET_PTR16:%.*]] = getelementptr i8, ptr [[TMP8]], i64 -32 +// CHECK-TYPEAUTH-NEXT: [[VBASE_OFFSET17:%.*]] = load i64, ptr [[VBASE_OFFSET_PTR16]], align 8 +// CHECK-TYPEAUTH-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, ptr [[EINSTANCE]], i64 [[VBASE_OFFSET17]] +// CHECK-TYPEAUTH-NEXT: br label %[[CAST_END19]] +// CHECK-TYPEAUTH: [[CAST_END19]]: +// CHECK-TYPEAUTH-NEXT: [[CAST_RESULT20:%.*]] = phi ptr [ [[ADD_PTR18]], %[[CAST_NOTNULL14]] ], [ null, %[[CAST_END11]] ] +// CHECK-TYPEAUTH-NEXT: [[CALL21:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[CAST_RESULT20]]) +// CHECK-TYPEAUTH-NEXT: [[TMP9:%.*]] = icmp eq ptr [[EINSTANCE]], null +// CHECK-TYPEAUTH-NEXT: br i1 [[TMP9]], label %[[CAST_END24:.*]], label %[[CAST_NOTNULL22:.*]] +// CHECK-TYPEAUTH: [[CAST_NOTNULL22]]: +// CHECK-TYPEAUTH-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, ptr [[EINSTANCE]], i64 8 +// CHECK-TYPEAUTH-NEXT: br label %[[CAST_END24]] +// CHECK-TYPEAUTH: [[CAST_END24]]: +// CHECK-TYPEAUTH-NEXT: [[CAST_RESULT25:%.*]] = phi ptr [ [[ADD_PTR23]], %[[CAST_NOTNULL22]] ], [ null, %[[CAST_END19]] ] +// CHECK-TYPEAUTH-NEXT: [[CALL26:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[CAST_RESULT25]]) +// CHECK-TYPEAUTH-NEXT: [[CALL27:%.*]] = call ptr @_ZN5test11bEPNS_1BE(ptr [[BINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: [[TMP10:%.*]] = icmp eq ptr [[CINSTANCE]], null +// CHECK-TYPEAUTH-NEXT: br i1 [[TMP10]], label %[[CAST_END30:.*]], label %[[CAST_NOTNULL28:.*]] +// CHECK-TYPEAUTH: [[CAST_NOTNULL28]]: +// CHECK-TYPEAUTH-NEXT: [[ADD_PTR29:%.*]] = getelementptr inbounds i8, ptr [[CINSTANCE]], i64 8 +// CHECK-TYPEAUTH-NEXT: br label %[[CAST_END30]] +// CHECK-TYPEAUTH: [[CAST_END30]]: +// CHECK-TYPEAUTH-NEXT: [[CAST_RESULT31:%.*]] = phi ptr [ [[ADD_PTR29]], %[[CAST_NOTNULL28]] ], [ null, %[[CAST_END24]] ] +// CHECK-TYPEAUTH-NEXT: [[CALL32:%.*]] = call ptr @_ZN5test11bEPNS_1BE(ptr [[CAST_RESULT31]]) +// CHECK-TYPEAUTH-NEXT: [[TMP11:%.*]] = icmp eq ptr [[EINSTANCE]], null +// CHECK-TYPEAUTH-NEXT: br i1 [[TMP11]], label %[[CAST_END35:.*]], label %[[CAST_NOTNULL33:.*]] +// CHECK-TYPEAUTH: [[CAST_NOTNULL33]]: +// CHECK-TYPEAUTH-NEXT: [[ADD_PTR34:%.*]] = getelementptr inbounds i8, ptr [[EINSTANCE]], i64 8 +// CHECK-TYPEAUTH-NEXT: br label %[[CAST_END35]] +// CHECK-TYPEAUTH: [[CAST_END35]]: +// CHECK-TYPEAUTH-NEXT: [[CAST_RESULT36:%.*]] = phi ptr [ [[ADD_PTR34]], %[[CAST_NOTNULL33]] ], [ null, %[[CAST_END30]] ] +// CHECK-TYPEAUTH-NEXT: [[CALL37:%.*]] = call ptr @_ZN5test11bEPNS_1BE(ptr [[CAST_RESULT36]]) +// CHECK-TYPEAUTH-NEXT: [[CALL38:%.*]] = call ptr @_ZN5test16b_as_AEPNS_1BE(ptr [[BINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: [[CALL39:%.*]] = call ptr @_ZN5test11cEPNS_1CE(ptr [[CINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: [[CALL40:%.*]] = call ptr @_ZN5test16c_as_ZEPNS_1CE(ptr [[CINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: [[CALL41:%.*]] = call ptr @_ZN5test16c_as_BEPNS_1CE(ptr [[CINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: [[CALL42:%.*]] = call ptr @_ZN5test11dEPNS_1DE(ptr [[DINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: [[CALL43:%.*]] = call ptr @_ZN5test11dEPNS_1DE(ptr [[EINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: [[CALL44:%.*]] = call ptr @_ZN5test16d_as_AEPNS_1DE(ptr [[DINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: [[CALL45:%.*]] = call ptr @_ZN5test16d_as_AEPNS_1DE(ptr [[EINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: [[CALL46:%.*]] = call ptr @_ZN5test11eEPNS_1EE(ptr [[EINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: [[CALL47:%.*]] = call ptr @_ZN5test16e_as_BEPNS_1EE(ptr [[EINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: [[CALL48:%.*]] = call ptr @_ZN5test16e_as_DEPNS_1EE(ptr [[EINSTANCE]]) +// CHECK-TYPEAUTH-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %"struct.test1::E"], ptr [[EARRAY]], i64 0, i64 0 +// CHECK-TYPEAUTH-NEXT: [[VTABLE49:%.*]] = load ptr, ptr [[ARRAYDECAY]], align 8, !tbaa [[TBAA9]] +// CHECK-TYPEAUTH-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[VTABLE49]] to i64 +// CHECK-TYPEAUTH-NEXT: [[TMP13:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP12]], i32 2, i64 48388) +// CHECK-TYPEAUTH-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +// CHECK-TYPEAUTH-NEXT: [[TMP15:%.*]] = load volatile i8, ptr [[TMP14]], align 8 +// CHECK-TYPEAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[EARRAY]]) #[[ATTR7]] +// CHECK-TYPEAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[EINSTANCE]]) #[[ATTR7]] +// CHECK-TYPEAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[DINSTANCE]]) #[[ATTR7]] +// CHECK-TYPEAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[CINSTANCE]]) #[[ATTR7]] +// CHECK-TYPEAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[BINSTANCE]]) #[[ATTR7]] +// CHECK-TYPEAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[AINSTANCE]]) #[[ATTR7]] +// CHECK-TYPEAUTH-NEXT: ret void +// +// CHECK-ADDRESSAUTH-LABEL: define void @_ZN5test14testEv( +// CHECK-ADDRESSAUTH-SAME: ) #[[ATTR0]] { +// CHECK-ADDRESSAUTH-NEXT: [[ENTRY:.*]]: +// CHECK-ADDRESSAUTH-NEXT: [[AINSTANCE:%.*]] = alloca %"struct.test1::A", align 8 +// CHECK-ADDRESSAUTH-NEXT: [[BINSTANCE:%.*]] = alloca %"struct.test1::B", align 8 +// CHECK-ADDRESSAUTH-NEXT: [[CINSTANCE:%.*]] = alloca %"struct.test1::C", align 8 +// CHECK-ADDRESSAUTH-NEXT: [[DINSTANCE:%.*]] = alloca %"struct.test1::D", align 8 +// CHECK-ADDRESSAUTH-NEXT: [[EINSTANCE:%.*]] = alloca %"struct.test1::E", align 8 +// CHECK-ADDRESSAUTH-NEXT: [[EARRAY:%.*]] = alloca [1 x %"struct.test1::E"], align 8 +// CHECK-ADDRESSAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[AINSTANCE]]) #[[ATTR7]] +// CHECK-ADDRESSAUTH-NEXT: [[CALL:%.*]] = call ptr @_ZN5test11AC1Ev(ptr nonnull align 8 dereferenceable(8) [[AINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[BINSTANCE]]) #[[ATTR7]] +// CHECK-ADDRESSAUTH-NEXT: [[CALL1:%.*]] = call ptr @_ZN5test11BC1Ev(ptr nonnull align 8 dereferenceable(8) [[BINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[CINSTANCE]]) #[[ATTR7]] +// CHECK-ADDRESSAUTH-NEXT: [[CALL2:%.*]] = call ptr @_ZN5test11CC1Ev(ptr nonnull align 8 dereferenceable(16) [[CINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[DINSTANCE]]) #[[ATTR7]] +// CHECK-ADDRESSAUTH-NEXT: [[CALL3:%.*]] = call ptr @_ZN5test11DC1Ev(ptr nonnull align 8 dereferenceable(8) [[DINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[EINSTANCE]]) #[[ATTR7]] +// CHECK-ADDRESSAUTH-NEXT: [[CALL4:%.*]] = call ptr @_ZN5test11EC1Ev(ptr nonnull align 8 dereferenceable(16) [[EINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[EARRAY]]) #[[ATTR7]] +// CHECK-ADDRESSAUTH-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[EARRAY]], i8 0, i64 16, i1 false) +// CHECK-ADDRESSAUTH-NEXT: [[CALL5:%.*]] = call ptr @_ZN5test11EC1Ev(ptr nonnull align 8 dereferenceable(16) [[EARRAY]]) +// CHECK-ADDRESSAUTH-NEXT: [[CALL6:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[AINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: [[CALL7:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[BINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP0:%.*]] = icmp eq ptr [[CINSTANCE]], null +// CHECK-ADDRESSAUTH-NEXT: br i1 [[TMP0]], label %[[CAST_END:.*]], label %[[CAST_NOTNULL:.*]] +// CHECK-ADDRESSAUTH: [[CAST_NOTNULL]]: +// CHECK-ADDRESSAUTH-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[CINSTANCE]], i64 8 +// CHECK-ADDRESSAUTH-NEXT: br label %[[CAST_END]] +// CHECK-ADDRESSAUTH: [[CAST_END]]: +// CHECK-ADDRESSAUTH-NEXT: [[CAST_RESULT:%.*]] = phi ptr [ [[ADD_PTR]], %[[CAST_NOTNULL]] ], [ null, %[[ENTRY]] ] +// CHECK-ADDRESSAUTH-NEXT: [[CALL8:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[CAST_RESULT]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP1:%.*]] = icmp eq ptr [[DINSTANCE]], null +// CHECK-ADDRESSAUTH-NEXT: br i1 [[TMP1]], label %[[CAST_END11:.*]], label %[[CAST_NOTNULL9:.*]] +// CHECK-ADDRESSAUTH: [[CAST_NOTNULL9]]: +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[DINSTANCE]], align 8, !tbaa [[TBAA9]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[DINSTANCE]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP4:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP3]], i32 2, i64 [[TMP2]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[VBASE_OFFSET_PTR:%.*]] = getelementptr i8, ptr [[TMP5]], i64 -32 +// CHECK-ADDRESSAUTH-NEXT: [[VBASE_OFFSET:%.*]] = load i64, ptr [[VBASE_OFFSET_PTR]], align 8 +// CHECK-ADDRESSAUTH-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, ptr [[DINSTANCE]], i64 [[VBASE_OFFSET]] +// CHECK-ADDRESSAUTH-NEXT: br label %[[CAST_END11]] +// CHECK-ADDRESSAUTH: [[CAST_END11]]: +// CHECK-ADDRESSAUTH-NEXT: [[CAST_RESULT12:%.*]] = phi ptr [ [[ADD_PTR10]], %[[CAST_NOTNULL9]] ], [ null, %[[CAST_END]] ] +// CHECK-ADDRESSAUTH-NEXT: [[CALL13:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[CAST_RESULT12]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP6:%.*]] = icmp eq ptr [[EINSTANCE]], null +// CHECK-ADDRESSAUTH-NEXT: br i1 [[TMP6]], label %[[CAST_END19:.*]], label %[[CAST_NOTNULL14:.*]] +// CHECK-ADDRESSAUTH: [[CAST_NOTNULL14]]: +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE15:%.*]] = load ptr, ptr [[EINSTANCE]], align 8, !tbaa [[TBAA9]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[EINSTANCE]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[VTABLE15]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP9:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP8]], i32 2, i64 [[TMP7]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[VBASE_OFFSET_PTR16:%.*]] = getelementptr i8, ptr [[TMP10]], i64 -32 +// CHECK-ADDRESSAUTH-NEXT: [[VBASE_OFFSET17:%.*]] = load i64, ptr [[VBASE_OFFSET_PTR16]], align 8 +// CHECK-ADDRESSAUTH-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, ptr [[EINSTANCE]], i64 [[VBASE_OFFSET17]] +// CHECK-ADDRESSAUTH-NEXT: br label %[[CAST_END19]] +// CHECK-ADDRESSAUTH: [[CAST_END19]]: +// CHECK-ADDRESSAUTH-NEXT: [[CAST_RESULT20:%.*]] = phi ptr [ [[ADD_PTR18]], %[[CAST_NOTNULL14]] ], [ null, %[[CAST_END11]] ] +// CHECK-ADDRESSAUTH-NEXT: [[CALL21:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[CAST_RESULT20]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP11:%.*]] = icmp eq ptr [[EINSTANCE]], null +// CHECK-ADDRESSAUTH-NEXT: br i1 [[TMP11]], label %[[CAST_END24:.*]], label %[[CAST_NOTNULL22:.*]] +// CHECK-ADDRESSAUTH: [[CAST_NOTNULL22]]: +// CHECK-ADDRESSAUTH-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, ptr [[EINSTANCE]], i64 8 +// CHECK-ADDRESSAUTH-NEXT: br label %[[CAST_END24]] +// CHECK-ADDRESSAUTH: [[CAST_END24]]: +// CHECK-ADDRESSAUTH-NEXT: [[CAST_RESULT25:%.*]] = phi ptr [ [[ADD_PTR23]], %[[CAST_NOTNULL22]] ], [ null, %[[CAST_END19]] ] +// CHECK-ADDRESSAUTH-NEXT: [[CALL26:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[CAST_RESULT25]]) +// CHECK-ADDRESSAUTH-NEXT: [[CALL27:%.*]] = call ptr @_ZN5test11bEPNS_1BE(ptr [[BINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP12:%.*]] = icmp eq ptr [[CINSTANCE]], null +// CHECK-ADDRESSAUTH-NEXT: br i1 [[TMP12]], label %[[CAST_END30:.*]], label %[[CAST_NOTNULL28:.*]] +// CHECK-ADDRESSAUTH: [[CAST_NOTNULL28]]: +// CHECK-ADDRESSAUTH-NEXT: [[ADD_PTR29:%.*]] = getelementptr inbounds i8, ptr [[CINSTANCE]], i64 8 +// CHECK-ADDRESSAUTH-NEXT: br label %[[CAST_END30]] +// CHECK-ADDRESSAUTH: [[CAST_END30]]: +// CHECK-ADDRESSAUTH-NEXT: [[CAST_RESULT31:%.*]] = phi ptr [ [[ADD_PTR29]], %[[CAST_NOTNULL28]] ], [ null, %[[CAST_END24]] ] +// CHECK-ADDRESSAUTH-NEXT: [[CALL32:%.*]] = call ptr @_ZN5test11bEPNS_1BE(ptr [[CAST_RESULT31]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP13:%.*]] = icmp eq ptr [[EINSTANCE]], null +// CHECK-ADDRESSAUTH-NEXT: br i1 [[TMP13]], label %[[CAST_END35:.*]], label %[[CAST_NOTNULL33:.*]] +// CHECK-ADDRESSAUTH: [[CAST_NOTNULL33]]: +// CHECK-ADDRESSAUTH-NEXT: [[ADD_PTR34:%.*]] = getelementptr inbounds i8, ptr [[EINSTANCE]], i64 8 +// CHECK-ADDRESSAUTH-NEXT: br label %[[CAST_END35]] +// CHECK-ADDRESSAUTH: [[CAST_END35]]: +// CHECK-ADDRESSAUTH-NEXT: [[CAST_RESULT36:%.*]] = phi ptr [ [[ADD_PTR34]], %[[CAST_NOTNULL33]] ], [ null, %[[CAST_END30]] ] +// CHECK-ADDRESSAUTH-NEXT: [[CALL37:%.*]] = call ptr @_ZN5test11bEPNS_1BE(ptr [[CAST_RESULT36]]) +// CHECK-ADDRESSAUTH-NEXT: [[CALL38:%.*]] = call ptr @_ZN5test16b_as_AEPNS_1BE(ptr [[BINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: [[CALL39:%.*]] = call ptr @_ZN5test11cEPNS_1CE(ptr [[CINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: [[CALL40:%.*]] = call ptr @_ZN5test16c_as_ZEPNS_1CE(ptr [[CINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: [[CALL41:%.*]] = call ptr @_ZN5test16c_as_BEPNS_1CE(ptr [[CINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: [[CALL42:%.*]] = call ptr @_ZN5test11dEPNS_1DE(ptr [[DINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: [[CALL43:%.*]] = call ptr @_ZN5test11dEPNS_1DE(ptr [[EINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: [[CALL44:%.*]] = call ptr @_ZN5test16d_as_AEPNS_1DE(ptr [[DINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: [[CALL45:%.*]] = call ptr @_ZN5test16d_as_AEPNS_1DE(ptr [[EINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: [[CALL46:%.*]] = call ptr @_ZN5test11eEPNS_1EE(ptr [[EINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: [[CALL47:%.*]] = call ptr @_ZN5test16e_as_BEPNS_1EE(ptr [[EINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: [[CALL48:%.*]] = call ptr @_ZN5test16e_as_DEPNS_1EE(ptr [[EINSTANCE]]) +// CHECK-ADDRESSAUTH-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %"struct.test1::E"], ptr [[EARRAY]], i64 0, i64 0 +// CHECK-ADDRESSAUTH-NEXT: [[VTABLE49:%.*]] = load ptr, ptr [[ARRAYDECAY]], align 8, !tbaa [[TBAA9]] +// CHECK-ADDRESSAUTH-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[ARRAYDECAY]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[VTABLE49]] to i64 +// CHECK-ADDRESSAUTH-NEXT: [[TMP16:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP15]], i32 2, i64 [[TMP14]]) +// CHECK-ADDRESSAUTH-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr +// CHECK-ADDRESSAUTH-NEXT: [[TMP18:%.*]] = load volatile i8, ptr [[TMP17]], align 8 +// CHECK-ADDRESSAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[EARRAY]]) #[[ATTR7]] +// CHECK-ADDRESSAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[EINSTANCE]]) #[[ATTR7]] +// CHECK-ADDRESSAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[DINSTANCE]]) #[[ATTR7]] +// CHECK-ADDRESSAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[CINSTANCE]]) #[[ATTR7]] +// CHECK-ADDRESSAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[BINSTANCE]]) #[[ATTR7]] +// CHECK-ADDRESSAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[AINSTANCE]]) #[[ATTR7]] +// CHECK-ADDRESSAUTH-NEXT: ret void +// +// CHECK-BOTHAUTH-LABEL: define void @_ZN5test14testEv( +// CHECK-BOTHAUTH-SAME: ) #[[ATTR0]] { +// CHECK-BOTHAUTH-NEXT: [[ENTRY:.*]]: +// CHECK-BOTHAUTH-NEXT: [[AINSTANCE:%.*]] = alloca %"struct.test1::A", align 8 +// CHECK-BOTHAUTH-NEXT: [[BINSTANCE:%.*]] = alloca %"struct.test1::B", align 8 +// CHECK-BOTHAUTH-NEXT: [[CINSTANCE:%.*]] = alloca %"struct.test1::C", align 8 +// CHECK-BOTHAUTH-NEXT: [[DINSTANCE:%.*]] = alloca %"struct.test1::D", align 8 +// CHECK-BOTHAUTH-NEXT: [[EINSTANCE:%.*]] = alloca %"struct.test1::E", align 8 +// CHECK-BOTHAUTH-NEXT: [[EARRAY:%.*]] = alloca [1 x %"struct.test1::E"], align 8 +// CHECK-BOTHAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[AINSTANCE]]) #[[ATTR7]] +// CHECK-BOTHAUTH-NEXT: [[CALL:%.*]] = call ptr @_ZN5test11AC1Ev(ptr nonnull align 8 dereferenceable(8) [[AINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[BINSTANCE]]) #[[ATTR7]] +// CHECK-BOTHAUTH-NEXT: [[CALL1:%.*]] = call ptr @_ZN5test11BC1Ev(ptr nonnull align 8 dereferenceable(8) [[BINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[CINSTANCE]]) #[[ATTR7]] +// CHECK-BOTHAUTH-NEXT: [[CALL2:%.*]] = call ptr @_ZN5test11CC1Ev(ptr nonnull align 8 dereferenceable(16) [[CINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[DINSTANCE]]) #[[ATTR7]] +// CHECK-BOTHAUTH-NEXT: [[CALL3:%.*]] = call ptr @_ZN5test11DC1Ev(ptr nonnull align 8 dereferenceable(8) [[DINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[EINSTANCE]]) #[[ATTR7]] +// CHECK-BOTHAUTH-NEXT: [[CALL4:%.*]] = call ptr @_ZN5test11EC1Ev(ptr nonnull align 8 dereferenceable(16) [[EINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: call void @llvm.lifetime.start.p0(ptr [[EARRAY]]) #[[ATTR7]] +// CHECK-BOTHAUTH-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[EARRAY]], i8 0, i64 16, i1 false) +// CHECK-BOTHAUTH-NEXT: [[CALL5:%.*]] = call ptr @_ZN5test11EC1Ev(ptr nonnull align 8 dereferenceable(16) [[EARRAY]]) +// CHECK-BOTHAUTH-NEXT: [[CALL6:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[AINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: [[CALL7:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[BINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: [[TMP0:%.*]] = icmp eq ptr [[CINSTANCE]], null +// CHECK-BOTHAUTH-NEXT: br i1 [[TMP0]], label %[[CAST_END:.*]], label %[[CAST_NOTNULL:.*]] +// CHECK-BOTHAUTH: [[CAST_NOTNULL]]: +// CHECK-BOTHAUTH-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[CINSTANCE]], i64 8 +// CHECK-BOTHAUTH-NEXT: br label %[[CAST_END]] +// CHECK-BOTHAUTH: [[CAST_END]]: +// CHECK-BOTHAUTH-NEXT: [[CAST_RESULT:%.*]] = phi ptr [ [[ADD_PTR]], %[[CAST_NOTNULL]] ], [ null, %[[ENTRY]] ] +// CHECK-BOTHAUTH-NEXT: [[CALL8:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[CAST_RESULT]]) +// CHECK-BOTHAUTH-NEXT: [[TMP1:%.*]] = icmp eq ptr [[DINSTANCE]], null +// CHECK-BOTHAUTH-NEXT: br i1 [[TMP1]], label %[[CAST_END11:.*]], label %[[CAST_NOTNULL9:.*]] +// CHECK-BOTHAUTH: [[CAST_NOTNULL9]]: +// CHECK-BOTHAUTH-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[DINSTANCE]], align 8, !tbaa [[TBAA9]] +// CHECK-BOTHAUTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[DINSTANCE]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP3:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP2]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[VTABLE]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP5:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP4]], i32 2, i64 [[TMP3]]) +// CHECK-BOTHAUTH-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +// CHECK-BOTHAUTH-NEXT: [[VBASE_OFFSET_PTR:%.*]] = getelementptr i8, ptr [[TMP6]], i64 -32 +// CHECK-BOTHAUTH-NEXT: [[VBASE_OFFSET:%.*]] = load i64, ptr [[VBASE_OFFSET_PTR]], align 8 +// CHECK-BOTHAUTH-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, ptr [[DINSTANCE]], i64 [[VBASE_OFFSET]] +// CHECK-BOTHAUTH-NEXT: br label %[[CAST_END11]] +// CHECK-BOTHAUTH: [[CAST_END11]]: +// CHECK-BOTHAUTH-NEXT: [[CAST_RESULT12:%.*]] = phi ptr [ [[ADD_PTR10]], %[[CAST_NOTNULL9]] ], [ null, %[[CAST_END]] ] +// CHECK-BOTHAUTH-NEXT: [[CALL13:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[CAST_RESULT12]]) +// CHECK-BOTHAUTH-NEXT: [[TMP7:%.*]] = icmp eq ptr [[EINSTANCE]], null +// CHECK-BOTHAUTH-NEXT: br i1 [[TMP7]], label %[[CAST_END19:.*]], label %[[CAST_NOTNULL14:.*]] +// CHECK-BOTHAUTH: [[CAST_NOTNULL14]]: +// CHECK-BOTHAUTH-NEXT: [[VTABLE15:%.*]] = load ptr, ptr [[EINSTANCE]], align 8, !tbaa [[TBAA9]] +// CHECK-BOTHAUTH-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[EINSTANCE]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP9:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP8]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[VTABLE15]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP11:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP10]], i32 2, i64 [[TMP9]]) +// CHECK-BOTHAUTH-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +// CHECK-BOTHAUTH-NEXT: [[VBASE_OFFSET_PTR16:%.*]] = getelementptr i8, ptr [[TMP12]], i64 -32 +// CHECK-BOTHAUTH-NEXT: [[VBASE_OFFSET17:%.*]] = load i64, ptr [[VBASE_OFFSET_PTR16]], align 8 +// CHECK-BOTHAUTH-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, ptr [[EINSTANCE]], i64 [[VBASE_OFFSET17]] +// CHECK-BOTHAUTH-NEXT: br label %[[CAST_END19]] +// CHECK-BOTHAUTH: [[CAST_END19]]: +// CHECK-BOTHAUTH-NEXT: [[CAST_RESULT20:%.*]] = phi ptr [ [[ADD_PTR18]], %[[CAST_NOTNULL14]] ], [ null, %[[CAST_END11]] ] +// CHECK-BOTHAUTH-NEXT: [[CALL21:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[CAST_RESULT20]]) +// CHECK-BOTHAUTH-NEXT: [[TMP13:%.*]] = icmp eq ptr [[EINSTANCE]], null +// CHECK-BOTHAUTH-NEXT: br i1 [[TMP13]], label %[[CAST_END24:.*]], label %[[CAST_NOTNULL22:.*]] +// CHECK-BOTHAUTH: [[CAST_NOTNULL22]]: +// CHECK-BOTHAUTH-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, ptr [[EINSTANCE]], i64 8 +// CHECK-BOTHAUTH-NEXT: br label %[[CAST_END24]] +// CHECK-BOTHAUTH: [[CAST_END24]]: +// CHECK-BOTHAUTH-NEXT: [[CAST_RESULT25:%.*]] = phi ptr [ [[ADD_PTR23]], %[[CAST_NOTNULL22]] ], [ null, %[[CAST_END19]] ] +// CHECK-BOTHAUTH-NEXT: [[CALL26:%.*]] = call ptr @_ZN5test11aEPNS_1AE(ptr [[CAST_RESULT25]]) +// CHECK-BOTHAUTH-NEXT: [[CALL27:%.*]] = call ptr @_ZN5test11bEPNS_1BE(ptr [[BINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: [[TMP14:%.*]] = icmp eq ptr [[CINSTANCE]], null +// CHECK-BOTHAUTH-NEXT: br i1 [[TMP14]], label %[[CAST_END30:.*]], label %[[CAST_NOTNULL28:.*]] +// CHECK-BOTHAUTH: [[CAST_NOTNULL28]]: +// CHECK-BOTHAUTH-NEXT: [[ADD_PTR29:%.*]] = getelementptr inbounds i8, ptr [[CINSTANCE]], i64 8 +// CHECK-BOTHAUTH-NEXT: br label %[[CAST_END30]] +// CHECK-BOTHAUTH: [[CAST_END30]]: +// CHECK-BOTHAUTH-NEXT: [[CAST_RESULT31:%.*]] = phi ptr [ [[ADD_PTR29]], %[[CAST_NOTNULL28]] ], [ null, %[[CAST_END24]] ] +// CHECK-BOTHAUTH-NEXT: [[CALL32:%.*]] = call ptr @_ZN5test11bEPNS_1BE(ptr [[CAST_RESULT31]]) +// CHECK-BOTHAUTH-NEXT: [[TMP15:%.*]] = icmp eq ptr [[EINSTANCE]], null +// CHECK-BOTHAUTH-NEXT: br i1 [[TMP15]], label %[[CAST_END35:.*]], label %[[CAST_NOTNULL33:.*]] +// CHECK-BOTHAUTH: [[CAST_NOTNULL33]]: +// CHECK-BOTHAUTH-NEXT: [[ADD_PTR34:%.*]] = getelementptr inbounds i8, ptr [[EINSTANCE]], i64 8 +// CHECK-BOTHAUTH-NEXT: br label %[[CAST_END35]] +// CHECK-BOTHAUTH: [[CAST_END35]]: +// CHECK-BOTHAUTH-NEXT: [[CAST_RESULT36:%.*]] = phi ptr [ [[ADD_PTR34]], %[[CAST_NOTNULL33]] ], [ null, %[[CAST_END30]] ] +// CHECK-BOTHAUTH-NEXT: [[CALL37:%.*]] = call ptr @_ZN5test11bEPNS_1BE(ptr [[CAST_RESULT36]]) +// CHECK-BOTHAUTH-NEXT: [[CALL38:%.*]] = call ptr @_ZN5test16b_as_AEPNS_1BE(ptr [[BINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: [[CALL39:%.*]] = call ptr @_ZN5test11cEPNS_1CE(ptr [[CINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: [[CALL40:%.*]] = call ptr @_ZN5test16c_as_ZEPNS_1CE(ptr [[CINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: [[CALL41:%.*]] = call ptr @_ZN5test16c_as_BEPNS_1CE(ptr [[CINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: [[CALL42:%.*]] = call ptr @_ZN5test11dEPNS_1DE(ptr [[DINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: [[CALL43:%.*]] = call ptr @_ZN5test11dEPNS_1DE(ptr [[EINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: [[CALL44:%.*]] = call ptr @_ZN5test16d_as_AEPNS_1DE(ptr [[DINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: [[CALL45:%.*]] = call ptr @_ZN5test16d_as_AEPNS_1DE(ptr [[EINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: [[CALL46:%.*]] = call ptr @_ZN5test11eEPNS_1EE(ptr [[EINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: [[CALL47:%.*]] = call ptr @_ZN5test16e_as_BEPNS_1EE(ptr [[EINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: [[CALL48:%.*]] = call ptr @_ZN5test16e_as_DEPNS_1EE(ptr [[EINSTANCE]]) +// CHECK-BOTHAUTH-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %"struct.test1::E"], ptr [[EARRAY]], i64 0, i64 0 +// CHECK-BOTHAUTH-NEXT: [[VTABLE49:%.*]] = load ptr, ptr [[ARRAYDECAY]], align 8, !tbaa [[TBAA9]] +// CHECK-BOTHAUTH-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYDECAY]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP17:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[TMP16]], i64 48388) +// CHECK-BOTHAUTH-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[VTABLE49]] to i64 +// CHECK-BOTHAUTH-NEXT: [[TMP19:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP18]], i32 2, i64 [[TMP17]]) +// CHECK-BOTHAUTH-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP19]] to ptr +// CHECK-BOTHAUTH-NEXT: [[TMP21:%.*]] = load volatile i8, ptr [[TMP20]], align 8 +// CHECK-BOTHAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[EARRAY]]) #[[ATTR7]] +// CHECK-BOTHAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[EINSTANCE]]) #[[ATTR7]] +// CHECK-BOTHAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[DINSTANCE]]) #[[ATTR7]] +// CHECK-BOTHAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[CINSTANCE]]) #[[ATTR7]] +// CHECK-BOTHAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[BINSTANCE]]) #[[ATTR7]] +// CHECK-BOTHAUTH-NEXT: call void @llvm.lifetime.end.p0(ptr [[AINSTANCE]]) #[[ATTR7]] +// CHECK-BOTHAUTH-NEXT: ret void +// void test() { A aInstance; B bInstance; @@ -368,3 +1350,68 @@ void test() { (void)__builtin_get_vtable_pointer(eArray); } } // namespace test1 +//. +// CHECK-NOAUTH: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-NOAUTH: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK-NOAUTH: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-NOAUTH: [[META7]] = !{!"p1 _ZTSN5test11AE", [[META8:![0-9]+]], i64 0} +// CHECK-NOAUTH: [[META8]] = !{!"any pointer", [[META4]], i64 0} +// CHECK-NOAUTH: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} +// CHECK-NOAUTH: [[META10]] = !{!"vtable pointer", [[META5]], i64 0} +// CHECK-NOAUTH: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} +// CHECK-NOAUTH: [[META12]] = !{!"p1 _ZTSN5test11BE", [[META8]], i64 0} +// CHECK-NOAUTH: [[TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0} +// CHECK-NOAUTH: [[META14]] = !{!"p1 _ZTSN5test11CE", [[META8]], i64 0} +// CHECK-NOAUTH: [[TBAA15]] = !{[[META16:![0-9]+]], [[META16]], i64 0} +// CHECK-NOAUTH: [[META16]] = !{!"p1 _ZTSN5test11DE", [[META8]], i64 0} +// CHECK-NOAUTH: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} +// CHECK-NOAUTH: [[META18]] = !{!"p1 _ZTSN5test11EE", [[META8]], i64 0} +//. +// CHECK-TYPEAUTH: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-TYPEAUTH: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK-TYPEAUTH: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-TYPEAUTH: [[META7]] = !{!"p1 _ZTSN5test11AE", [[META8:![0-9]+]], i64 0} +// CHECK-TYPEAUTH: [[META8]] = !{!"any pointer", [[META4]], i64 0} +// CHECK-TYPEAUTH: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} +// CHECK-TYPEAUTH: [[META10]] = !{!"vtable pointer", [[META5]], i64 0} +// CHECK-TYPEAUTH: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} +// CHECK-TYPEAUTH: [[META12]] = !{!"p1 _ZTSN5test11BE", [[META8]], i64 0} +// CHECK-TYPEAUTH: [[TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0} +// CHECK-TYPEAUTH: [[META14]] = !{!"p1 _ZTSN5test11CE", [[META8]], i64 0} +// CHECK-TYPEAUTH: [[TBAA15]] = !{[[META16:![0-9]+]], [[META16]], i64 0} +// CHECK-TYPEAUTH: [[META16]] = !{!"p1 _ZTSN5test11DE", [[META8]], i64 0} +// CHECK-TYPEAUTH: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} +// CHECK-TYPEAUTH: [[META18]] = !{!"p1 _ZTSN5test11EE", [[META8]], i64 0} +//. +// CHECK-ADDRESSAUTH: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-ADDRESSAUTH: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK-ADDRESSAUTH: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-ADDRESSAUTH: [[META7]] = !{!"p1 _ZTSN5test11AE", [[META8:![0-9]+]], i64 0} +// CHECK-ADDRESSAUTH: [[META8]] = !{!"any pointer", [[META4]], i64 0} +// CHECK-ADDRESSAUTH: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} +// CHECK-ADDRESSAUTH: [[META10]] = !{!"vtable pointer", [[META5]], i64 0} +// CHECK-ADDRESSAUTH: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} +// CHECK-ADDRESSAUTH: [[META12]] = !{!"p1 _ZTSN5test11BE", [[META8]], i64 0} +// CHECK-ADDRESSAUTH: [[TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0} +// CHECK-ADDRESSAUTH: [[META14]] = !{!"p1 _ZTSN5test11CE", [[META8]], i64 0} +// CHECK-ADDRESSAUTH: [[TBAA15]] = !{[[META16:![0-9]+]], [[META16]], i64 0} +// CHECK-ADDRESSAUTH: [[META16]] = !{!"p1 _ZTSN5test11DE", [[META8]], i64 0} +// CHECK-ADDRESSAUTH: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} +// CHECK-ADDRESSAUTH: [[META18]] = !{!"p1 _ZTSN5test11EE", [[META8]], i64 0} +//. +// CHECK-BOTHAUTH: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-BOTHAUTH: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK-BOTHAUTH: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-BOTHAUTH: [[META7]] = !{!"p1 _ZTSN5test11AE", [[META8:![0-9]+]], i64 0} +// CHECK-BOTHAUTH: [[META8]] = !{!"any pointer", [[META4]], i64 0} +// CHECK-BOTHAUTH: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} +// CHECK-BOTHAUTH: [[META10]] = !{!"vtable pointer", [[META5]], i64 0} +// CHECK-BOTHAUTH: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} +// CHECK-BOTHAUTH: [[META12]] = !{!"p1 _ZTSN5test11BE", [[META8]], i64 0} +// CHECK-BOTHAUTH: [[TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0} +// CHECK-BOTHAUTH: [[META14]] = !{!"p1 _ZTSN5test11CE", [[META8]], i64 0} +// CHECK-BOTHAUTH: [[TBAA15]] = !{[[META16:![0-9]+]], [[META16]], i64 0} +// CHECK-BOTHAUTH: [[META16]] = !{!"p1 _ZTSN5test11DE", [[META8]], i64 0} +// CHECK-BOTHAUTH: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} +// CHECK-BOTHAUTH: [[META18]] = !{!"p1 _ZTSN5test11EE", [[META8]], i64 0} +//. diff --git a/clang/test/CodeGenCXX/cfi-mfcall-nomerge.cpp b/clang/test/CodeGenCXX/cfi-mfcall-nomerge.cpp index d4b4f3030d117..5c3bd17ab909c 100644 --- a/clang/test/CodeGenCXX/cfi-mfcall-nomerge.cpp +++ b/clang/test/CodeGenCXX/cfi-mfcall-nomerge.cpp @@ -29,43 +29,43 @@ void f(S *s, void (S::*p)()) { // NO-MERGE-NEXT: [[MEMPTR_ISVIRTUAL_NOT:%.*]] = icmp eq i64 [[TMP1]], 0 // NO-MERGE-NEXT: br i1 [[MEMPTR_ISVIRTUAL_NOT]], label %[[MEMPTR_NONVIRTUAL:.*]], label %[[MEMPTR_VIRTUAL:.*]] // NO-MERGE: [[MEMPTR_VIRTUAL]]: -// NO-MERGE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2:![0-9]+]] +// NO-MERGE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA6:![0-9]+]] // NO-MERGE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VTABLE]], i64 [[P_COERCE0]] // NO-MERGE-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP2]], i64 -1 -// NO-MERGE-NEXT: [[TMP4:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP3]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5:![0-9]+]] -// NO-MERGE-NEXT: br i1 [[TMP4]], label %[[MEMPTR_VIRTUAL7:.*]], label %[[TRAP:.*]], !prof [[PROF6:![0-9]+]], !nosanitize [[META5]] +// NO-MERGE-NEXT: [[TMP4:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP3]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META8:![0-9]+]] +// NO-MERGE-NEXT: br i1 [[TMP4]], label %[[MEMPTR_VIRTUAL7:.*]], label %[[TRAP:.*]], !prof [[PROF9:![0-9]+]], !nosanitize [[META8]] // NO-MERGE: [[TRAP]]: -// NO-MERGE-NEXT: tail call void @llvm.ubsantrap(i8 2) #[[ATTR3:[0-9]+]], !nosanitize [[META5]] -// NO-MERGE-NEXT: unreachable, !nosanitize [[META5]] +// NO-MERGE-NEXT: tail call void @llvm.ubsantrap(i8 2) #[[ATTR3:[0-9]+]], !nosanitize [[META8]] +// NO-MERGE-NEXT: unreachable, !nosanitize [[META8]] // NO-MERGE: [[MEMPTR_NONVIRTUAL]]: // NO-MERGE-NEXT: [[MEMPTR_NONVIRTUALFN:%.*]] = inttoptr i64 [[P_COERCE0]] to ptr -// NO-MERGE-NEXT: [[TMP5:%.*]] = tail call i1 @llvm.type.test(ptr [[MEMPTR_NONVIRTUALFN]], metadata !"_ZTSM2B1FvvE") -// NO-MERGE-NEXT: [[TMP6:%.*]] = tail call i1 @llvm.type.test(ptr [[MEMPTR_NONVIRTUALFN]], metadata !"_ZTSM2B2FvvE") -// NO-MERGE-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]], !nosanitize [[META5]] -// NO-MERGE-NEXT: br i1 [[TMP7]], label %[[MEMPTR_NONVIRTUAL23:.*]], label %[[TRAP2:.*]], !prof [[PROF6]], !nosanitize [[META5]] +// NO-MERGE-NEXT: [[TMP5:%.*]] = tail call i1 @llvm.type.test(ptr [[MEMPTR_NONVIRTUALFN]], metadata !"_ZTSM2B1FvvE"), !nosanitize [[META8]] +// NO-MERGE-NEXT: [[TMP6:%.*]] = tail call i1 @llvm.type.test(ptr [[MEMPTR_NONVIRTUALFN]], metadata !"_ZTSM2B2FvvE"), !nosanitize [[META8]] +// NO-MERGE-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]], !nosanitize [[META8]] +// NO-MERGE-NEXT: br i1 [[TMP7]], label %[[MEMPTR_NONVIRTUAL23:.*]], label %[[TRAP2:.*]], !prof [[PROF9]], !nosanitize [[META8]] // NO-MERGE: [[TRAP2]]: -// NO-MERGE-NEXT: tail call void @llvm.ubsantrap(i8 2) #[[ATTR4:[0-9]+]], !nosanitize [[META5]] -// NO-MERGE-NEXT: unreachable, !nosanitize [[META5]] +// NO-MERGE-NEXT: tail call void @llvm.ubsantrap(i8 2) #[[ATTR4:[0-9]+]], !nosanitize [[META8]] +// NO-MERGE-NEXT: unreachable, !nosanitize [[META8]] // NO-MERGE: [[MEMPTR_VIRTUAL7]]: -// NO-MERGE-NEXT: [[MEMPTR_VIRTUALFN:%.*]] = load ptr, ptr [[TMP3]], align 8, !nosanitize [[META5]] +// NO-MERGE-NEXT: [[MEMPTR_VIRTUALFN:%.*]] = load ptr, ptr [[TMP3]], align 8, !nosanitize [[META8]] // NO-MERGE-NEXT: tail call void [[MEMPTR_VIRTUALFN]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR5:[0-9]+]] -// NO-MERGE-NEXT: [[VTABLE8:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2]] +// NO-MERGE-NEXT: [[VTABLE8:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA6]] // NO-MERGE-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[VTABLE8]], i64 [[P_COERCE0]] // NO-MERGE-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 -1 -// NO-MERGE-NEXT: [[TMP10:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP9]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5]] -// NO-MERGE-NEXT: br i1 [[TMP10]], label %[[MEMPTR_VIRTUAL19:.*]], label %[[TRAP2]], !prof [[PROF6]], !nosanitize [[META5]] +// NO-MERGE-NEXT: [[TMP10:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP9]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META8]] +// NO-MERGE-NEXT: br i1 [[TMP10]], label %[[MEMPTR_VIRTUAL19:.*]], label %[[TRAP2]], !prof [[PROF9]], !nosanitize [[META8]] // NO-MERGE: [[TRAP13:.*]]: -// NO-MERGE-NEXT: tail call void @llvm.ubsantrap(i8 2) #[[ATTR4]], !nosanitize [[META5]] -// NO-MERGE-NEXT: unreachable, !nosanitize [[META5]] +// NO-MERGE-NEXT: tail call void @llvm.ubsantrap(i8 2) #[[ATTR4]], !nosanitize [[META8]] +// NO-MERGE-NEXT: unreachable, !nosanitize [[META8]] // NO-MERGE: [[MEMPTR_VIRTUAL19]]: -// NO-MERGE-NEXT: [[MEMPTR_VIRTUALFN9:%.*]] = load ptr, ptr [[TMP9]], align 8, !nosanitize [[META5]] +// NO-MERGE-NEXT: [[MEMPTR_VIRTUALFN9:%.*]] = load ptr, ptr [[TMP9]], align 8, !nosanitize [[META8]] // NO-MERGE-NEXT: tail call void [[MEMPTR_VIRTUALFN9]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR5]] -// NO-MERGE-NEXT: [[VTABLE20:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2]] +// NO-MERGE-NEXT: [[VTABLE20:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA6]] // NO-MERGE-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[VTABLE20]], i64 [[P_COERCE0]] // NO-MERGE-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP11]], i64 -1 -// NO-MERGE-NEXT: [[TMP13:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP12]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5]] -// NO-MERGE-NEXT: [[MEMPTR_VIRTUALFN21:%.*]] = load ptr, ptr [[TMP12]], align 8, !nosanitize [[META5]] -// NO-MERGE-NEXT: br i1 [[TMP13]], label %[[MEMPTR_END27:.*]], label %[[TRAP13]], !prof [[PROF6]], !nosanitize [[META5]] +// NO-MERGE-NEXT: [[TMP13:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP12]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META8]] +// NO-MERGE-NEXT: [[MEMPTR_VIRTUALFN21:%.*]] = load ptr, ptr [[TMP12]], align 8, !nosanitize [[META8]] +// NO-MERGE-NEXT: br i1 [[TMP13]], label %[[MEMPTR_END27:.*]], label %[[TRAP13]], !prof [[PROF9]], !nosanitize [[META8]] // NO-MERGE: [[MEMPTR_NONVIRTUAL23]]: // NO-MERGE-NEXT: tail call void [[MEMPTR_NONVIRTUALFN]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR5]] // NO-MERGE-NEXT: tail call void [[MEMPTR_NONVIRTUALFN]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR5]] @@ -84,37 +84,37 @@ void f(S *s, void (S::*p)()) { // MERGE-NEXT: [[MEMPTR_ISVIRTUAL_NOT:%.*]] = icmp eq i64 [[TMP1]], 0 // MERGE-NEXT: br i1 [[MEMPTR_ISVIRTUAL_NOT]], label %[[MEMPTR_NONVIRTUAL:.*]], label %[[MEMPTR_VIRTUAL:.*]] // MERGE: [[MEMPTR_VIRTUAL]]: -// MERGE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2:![0-9]+]] +// MERGE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA6:![0-9]+]] // MERGE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VTABLE]], i64 [[P_COERCE0]] // MERGE-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP2]], i64 -1 -// MERGE-NEXT: [[TMP4:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP3]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5:![0-9]+]] -// MERGE-NEXT: br i1 [[TMP4]], label %[[MEMPTR_VIRTUAL6:.*]], label %[[TRAP:.*]], !prof [[PROF6:![0-9]+]], !nosanitize [[META5]] +// MERGE-NEXT: [[TMP4:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP3]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META8:![0-9]+]] +// MERGE-NEXT: br i1 [[TMP4]], label %[[MEMPTR_VIRTUAL6:.*]], label %[[TRAP:.*]], !prof [[PROF9:![0-9]+]], !nosanitize [[META8]] // MERGE: [[TRAP]]: -// MERGE-NEXT: tail call void @llvm.ubsantrap(i8 2) #[[ATTR3:[0-9]+]], !nosanitize [[META5]] -// MERGE-NEXT: unreachable, !nosanitize [[META5]] +// MERGE-NEXT: tail call void @llvm.ubsantrap(i8 2) #[[ATTR3:[0-9]+]], !nosanitize [[META8]] +// MERGE-NEXT: unreachable, !nosanitize [[META8]] // MERGE: [[MEMPTR_NONVIRTUAL]]: // MERGE-NEXT: [[MEMPTR_NONVIRTUALFN:%.*]] = inttoptr i64 [[P_COERCE0]] to ptr -// MERGE-NEXT: [[TMP5:%.*]] = tail call i1 @llvm.type.test(ptr [[MEMPTR_NONVIRTUALFN]], metadata !"_ZTSM2B1FvvE") -// MERGE-NEXT: [[TMP6:%.*]] = tail call i1 @llvm.type.test(ptr [[MEMPTR_NONVIRTUALFN]], metadata !"_ZTSM2B2FvvE") -// MERGE-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]], !nosanitize [[META5]] -// MERGE-NEXT: br i1 [[TMP7]], label %[[MEMPTR_NONVIRTUAL21:.*]], label %[[TRAP]], !prof [[PROF6]], !nosanitize [[META5]] +// MERGE-NEXT: [[TMP5:%.*]] = tail call i1 @llvm.type.test(ptr [[MEMPTR_NONVIRTUALFN]], metadata !"_ZTSM2B1FvvE"), !nosanitize [[META8]] +// MERGE-NEXT: [[TMP6:%.*]] = tail call i1 @llvm.type.test(ptr [[MEMPTR_NONVIRTUALFN]], metadata !"_ZTSM2B2FvvE"), !nosanitize [[META8]] +// MERGE-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]], !nosanitize [[META8]] +// MERGE-NEXT: br i1 [[TMP7]], label %[[MEMPTR_NONVIRTUAL21:.*]], label %[[TRAP]], !prof [[PROF9]], !nosanitize [[META8]] // MERGE: [[MEMPTR_VIRTUAL6]]: -// MERGE-NEXT: [[MEMPTR_VIRTUALFN:%.*]] = load ptr, ptr [[TMP3]], align 8, !nosanitize [[META5]] +// MERGE-NEXT: [[MEMPTR_VIRTUALFN:%.*]] = load ptr, ptr [[TMP3]], align 8, !nosanitize [[META8]] // MERGE-NEXT: tail call void [[MEMPTR_VIRTUALFN]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR4:[0-9]+]] -// MERGE-NEXT: [[VTABLE7:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2]] +// MERGE-NEXT: [[VTABLE7:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA6]] // MERGE-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[VTABLE7]], i64 [[P_COERCE0]] // MERGE-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 -1 -// MERGE-NEXT: [[TMP10:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP9]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5]] -// MERGE-NEXT: br i1 [[TMP10]], label %[[MEMPTR_VIRTUAL17:.*]], label %[[TRAP]], !prof [[PROF6]], !nosanitize [[META5]] +// MERGE-NEXT: [[TMP10:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP9]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META8]] +// MERGE-NEXT: br i1 [[TMP10]], label %[[MEMPTR_VIRTUAL17:.*]], label %[[TRAP]], !prof [[PROF9]], !nosanitize [[META8]] // MERGE: [[MEMPTR_VIRTUAL17]]: -// MERGE-NEXT: [[MEMPTR_VIRTUALFN8:%.*]] = load ptr, ptr [[TMP9]], align 8, !nosanitize [[META5]] +// MERGE-NEXT: [[MEMPTR_VIRTUALFN8:%.*]] = load ptr, ptr [[TMP9]], align 8, !nosanitize [[META8]] // MERGE-NEXT: tail call void [[MEMPTR_VIRTUALFN8]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR4]] -// MERGE-NEXT: [[VTABLE18:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2]] +// MERGE-NEXT: [[VTABLE18:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA6]] // MERGE-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[VTABLE18]], i64 [[P_COERCE0]] // MERGE-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP11]], i64 -1 -// MERGE-NEXT: [[TMP13:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP12]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5]] -// MERGE-NEXT: [[MEMPTR_VIRTUALFN19:%.*]] = load ptr, ptr [[TMP12]], align 8, !nosanitize [[META5]] -// MERGE-NEXT: br i1 [[TMP13]], label %[[MEMPTR_END24:.*]], label %[[TRAP]], !prof [[PROF6]], !nosanitize [[META5]] +// MERGE-NEXT: [[TMP13:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP12]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META8]] +// MERGE-NEXT: [[MEMPTR_VIRTUALFN19:%.*]] = load ptr, ptr [[TMP12]], align 8, !nosanitize [[META8]] +// MERGE-NEXT: br i1 [[TMP13]], label %[[MEMPTR_END24:.*]], label %[[TRAP]], !prof [[PROF9]], !nosanitize [[META8]] // MERGE: [[MEMPTR_NONVIRTUAL21]]: // MERGE-NEXT: tail call void [[MEMPTR_NONVIRTUALFN]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR4]] // MERGE-NEXT: tail call void [[MEMPTR_NONVIRTUALFN]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR4]] @@ -125,15 +125,15 @@ void f(S *s, void (S::*p)()) { // MERGE-NEXT: ret void // //. -// NO-MERGE: [[VTABLE_POINTER_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// NO-MERGE: [[META3]] = !{!"vtable pointer", [[META4:![0-9]+]], i64 0} -// NO-MERGE: [[META4]] = !{!"Simple C++ TBAA"} -// NO-MERGE: [[META5]] = !{} -// NO-MERGE: [[PROF6]] = !{!"branch_weights", i32 1048575, i32 1} +// NO-MERGE: [[META5:![0-9]+]] = !{!"Simple C++ TBAA"} +// NO-MERGE: [[VTABLE_POINTER_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// NO-MERGE: [[META7]] = !{!"vtable pointer", [[META5]], i64 0} +// NO-MERGE: [[META8]] = !{} +// NO-MERGE: [[PROF9]] = !{!"branch_weights", i32 1048575, i32 1} //. -// MERGE: [[VTABLE_POINTER_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// MERGE: [[META3]] = !{!"vtable pointer", [[META4:![0-9]+]], i64 0} -// MERGE: [[META4]] = !{!"Simple C++ TBAA"} -// MERGE: [[META5]] = !{} -// MERGE: [[PROF6]] = !{!"branch_weights", i32 1048575, i32 1} +// MERGE: [[META5:![0-9]+]] = !{!"Simple C++ TBAA"} +// MERGE: [[VTABLE_POINTER_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// MERGE: [[META7]] = !{!"vtable pointer", [[META5]], i64 0} +// MERGE: [[META8]] = !{} +// MERGE: [[PROF9]] = !{!"branch_weights", i32 1048575, i32 1} //. diff --git a/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp index ab3695a3d9ce3..30941653c150d 100644 --- a/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp +++ b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp @@ -110,8 +110,8 @@ int first_i32_ulong2(int x, ulong2 *y) { return first(x, *y); } // CHECK-LABEL: define void @second_i32_ulong2( // CHECK-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 16)) [[R:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[INT_TBAA2:![0-9]+]] -// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[DOUBLE_TBAA6:![0-9]+]] +// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[DOUBLE_TBAA6]] // CHECK-NEXT: ret void // void second_i32_ulong2(int x, ulong2 *y, ulong2 *r) { @@ -121,8 +121,8 @@ void second_i32_ulong2(int x, ulong2 *y, ulong2 *r) { // CHECK-LABEL: define void @first_ulong2_i32( // CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]], i32 noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 16)) [[R:%.*]]) local_unnamed_addr #[[ATTR1]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[X]], align 16, !tbaa [[INT_TBAA2]] -// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[X]], align 16, !tbaa [[DOUBLE_TBAA6]] +// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[DOUBLE_TBAA6]] // CHECK-NEXT: ret void // void first_ulong2_i32(ulong2 *x, int y, ulong2 *r) { @@ -180,7 +180,7 @@ void first_asc_i32(asc *x, int y, asc *r) { *r = first(*x, y); } int second_asc_i32(asc *x, int y) { return second(*x, y); } } //. -// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"Simple C++ TBAA"} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK: [[DOUBLE_TBAA6]] = !{[[META4]], [[META4]], i64 0} //. diff --git a/clang/test/CodeGenCXX/load-reference-metadata.cpp b/clang/test/CodeGenCXX/load-reference-metadata.cpp index abfdd055c3ad6..b9777f4e0efd2 100644 --- a/clang/test/CodeGenCXX/load-reference-metadata.cpp +++ b/clang/test/CodeGenCXX/load-reference-metadata.cpp @@ -13,18 +13,18 @@ struct S { // CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(24) [[S:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1SPTR_TBAA2:![0-9]+]] -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1SPTR_TBAA2]], !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]] +// CHECK-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1SPTR_TBAA6:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1SPTR_TBAA6]], !nonnull [[META9:![0-9]+]], !align [[META10:![0-9]+]] // CHECK-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !tbaa [[CHARPTR_TBAA9:![0-9]+]], !nonnull [[META7]] -// CHECK-NEXT: store i8 0, ptr [[TMP1]], align 1, !tbaa [[CHAR_TBAA14:![0-9]+]] -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1SPTR_TBAA2]], !nonnull [[META7]], !align [[META8]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !tbaa [[CHARPTR_TBAA11:![0-9]+]], !nonnull [[META9]] +// CHECK-NEXT: store i8 0, ptr [[TMP1]], align 1, !tbaa [[CHAR_TBAA16:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1SPTR_TBAA6]], !nonnull [[META9]], !align [[META10]] // CHECK-NEXT: [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP2]], i32 0, i32 1 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B]], align 8, !tbaa [[INTPTR_TBAA15:![0-9]+]], !nonnull [[META7]], !align [[META16:![0-9]+]] -// CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4, !tbaa [[INT_TBAA17:![0-9]+]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1SPTR_TBAA2]], !nonnull [[META7]], !align [[META8]] +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B]], align 8, !tbaa [[INTPTR_TBAA17:![0-9]+]], !nonnull [[META9]], !align [[META18:![0-9]+]] +// CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1SPTR_TBAA6]], !nonnull [[META9]], !align [[META10]] // CHECK-NEXT: [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP4]], i32 0, i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8, !tbaa [[_ZTS1FPTR_TBAA19:![0-9]+]], !nonnull [[META7]], !align [[META20:![0-9]+]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8, !tbaa [[_ZTS1FPTR_TBAA19:![0-9]+]], !nonnull [[META9]], !align [[META20:![0-9]+]] // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_F:%.*]], ptr [[TMP5]], i32 0, i32 0 // CHECK-NEXT: store i32 0, ptr [[X]], align 32, !tbaa [[INT_TBAA21:![0-9]+]] // CHECK-NEXT: ret void @@ -42,10 +42,10 @@ extern B (&bb)[2]; // CHECK-LABEL: define dso_local void @_Z13test_externalv( // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr @b, align 8, !tbaa [[_ZTS1BPTR_TBAA23:![0-9]+]], !nonnull [[META7]], !align [[META8]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr @b, align 8, !tbaa [[_ZTS1BPTR_TBAA23:![0-9]+]], !nonnull [[META9]], !align [[META10]] // CHECK-NEXT: [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_B:%.*]], ptr [[TMP0]], i32 0, i32 2 // CHECK-NEXT: store i8 0, ptr [[C]], align 8, !tbaa [[CHAR_TBAA25:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr @bb, align 8, !tbaa [[_ZTS1BPTR_TBAA23]], !nonnull [[META7]], !align [[META20]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr @bb, align 8, !tbaa [[_ZTS1BPTR_TBAA23]], !nonnull [[META9]], !align [[META20]] // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x %struct.B], ptr [[TMP1]], i64 0, i64 0 // CHECK-NEXT: [[C1:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[ARRAYIDX]], i32 0, i32 2 // CHECK-NEXT: store i8 0, ptr [[C1]], align 16, !tbaa [[CHAR_TBAA25]] @@ -61,7 +61,7 @@ void test_external() { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1BPTR_TBAA23]] -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1BPTR_TBAA23]], !nonnull [[META7]], !align [[META8]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1BPTR_TBAA23]], !nonnull [[META9]], !align [[META10]] // CHECK-NEXT: [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_B:%.*]], ptr [[TMP0]], i32 0, i32 2 // CHECK-NEXT: ret ptr [[C]] // @@ -69,30 +69,30 @@ char* test_deref_only(B &s) { return &s.c; } //. -// CHECK: [[_ZTS1SPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"p1 _ZTS1S", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"any pointer", [[META5:![0-9]+]], i64 0} -// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -// CHECK: [[META6]] = !{!"Simple C++ TBAA"} -// CHECK: [[META7]] = !{} -// CHECK: [[META8]] = !{i64 8} -// CHECK: [[CHARPTR_TBAA9]] = !{[[META10:![0-9]+]], [[META11:![0-9]+]], i64 0} -// CHECK: [[META10]] = !{!"_ZTS1S", [[META11]], i64 0, [[META12:![0-9]+]], i64 8, [[META13:![0-9]+]], i64 16} -// CHECK: [[META11]] = !{!"p1 omnipotent char", [[META4]], i64 0} -// CHECK: [[META12]] = !{!"p1 int", [[META4]], i64 0} -// CHECK: [[META13]] = !{!"p1 _ZTS1F", [[META4]], i64 0} -// CHECK: [[CHAR_TBAA14]] = !{[[META5]], [[META5]], i64 0} -// CHECK: [[INTPTR_TBAA15]] = !{[[META10]], [[META12]], i64 8} -// CHECK: [[META16]] = !{i64 4} -// CHECK: [[INT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} -// CHECK: [[META18]] = !{!"int", [[META5]], i64 0} -// CHECK: [[_ZTS1FPTR_TBAA19]] = !{[[META10]], [[META13]], i64 16} +// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK: [[_ZTS1SPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"p1 _ZTS1S", [[META8:![0-9]+]], i64 0} +// CHECK: [[META8]] = !{!"any pointer", [[META4]], i64 0} +// CHECK: [[META9]] = !{} +// CHECK: [[META10]] = !{i64 8} +// CHECK: [[CHARPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META13:![0-9]+]], i64 0} +// CHECK: [[META12]] = !{!"_ZTS1S", [[META13]], i64 0, [[META14:![0-9]+]], i64 8, [[META15:![0-9]+]], i64 16} +// CHECK: [[META13]] = !{!"p1 omnipotent char", [[META8]], i64 0} +// CHECK: [[META14]] = !{!"p1 int", [[META8]], i64 0} +// CHECK: [[META15]] = !{!"p1 _ZTS1F", [[META8]], i64 0} +// CHECK: [[CHAR_TBAA16]] = !{[[META4]], [[META4]], i64 0} +// CHECK: [[INTPTR_TBAA17]] = !{[[META12]], [[META14]], i64 8} +// CHECK: [[META18]] = !{i64 4} +// CHECK: [[_ZTS1FPTR_TBAA19]] = !{[[META12]], [[META15]], i64 16} // CHECK: [[META20]] = !{i64 32} -// CHECK: [[INT_TBAA21]] = !{[[META22:![0-9]+]], [[META18]], i64 0} -// CHECK: [[META22]] = !{!"_ZTS1F", [[META18]], i64 0} +// CHECK: [[INT_TBAA21]] = !{[[META22:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META22]] = !{!"_ZTS1F", [[META3]], i64 0} // CHECK: [[_ZTS1BPTR_TBAA23]] = !{[[META24:![0-9]+]], [[META24]], i64 0} -// CHECK: [[META24]] = !{!"p1 _ZTS1B", [[META4]], i64 0} -// CHECK: [[CHAR_TBAA25]] = !{[[META26:![0-9]+]], [[META5]], i64 16} -// CHECK: [[META26]] = !{!"_ZTS1B", [[META27:![0-9]+]], i64 8, [[META5]], i64 16} -// CHECK: [[META27]] = !{!"long long", [[META5]], i64 0} +// CHECK: [[META24]] = !{!"p1 _ZTS1B", [[META8]], i64 0} +// CHECK: [[CHAR_TBAA25]] = !{[[META26:![0-9]+]], [[META4]], i64 16} +// CHECK: [[META26]] = !{!"_ZTS1B", [[META27:![0-9]+]], i64 8, [[META4]], i64 16} +// CHECK: [[META27]] = !{!"long long", [[META4]], i64 0} //. diff --git a/clang/test/CodeGenCXX/std-byte.cpp b/clang/test/CodeGenCXX/std-byte.cpp index 63f41e6f1e16a..00191b2711a34 100644 --- a/clang/test/CodeGenCXX/std-byte.cpp +++ b/clang/test/CodeGenCXX/std-byte.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 -std=c++1z -Werror -triple i386-unknown-unknown -emit-llvm -O1 -disable-llvm-passes -o - %s | FileCheck %s // std::byte should be considered equivalent to char for aliasing. @@ -8,10 +9,10 @@ enum byte : unsigned char {}; // CHECK-LABEL: define{{.*}} void @test0( extern "C" void test0(std::byte *sb, int *i) { - // CHECK: store i8 0, ptr %{{.*}} !tbaa [[TAG_CHAR:!.*]] + // CHECK: store i8 0, ptr %{{.*}}, align 1, !tbaa [[TBAA11:![0-9]+]] *sb = std::byte{0}; - // CHECK: store i32 1, ptr %{{.*}} !tbaa [[TAG_INT:!.*]] + // CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TBAA3:![0-9]+]] *i = 1; } @@ -27,15 +28,24 @@ enum byte : unsigned char {}; // CHECK-LABEL: define{{.*}} void @test1( extern "C" void test1(::byte *b, ::my::byte *mb, ::my::std::byte *msb) { + // CHECK: store i8 0, ptr %{{.*}}, align 1, !tbaa [[TBAA12:![0-9]+]] *b = ::byte{0}; + // CHECK: store i8 0, ptr %{{.*}}, align 1, !tbaa [[TBAA14:![0-9]+]] *mb = ::my::byte{0}; + // CHECK: store i8 0, ptr %{{.*}}, align 1, !tbaa [[TBAA16:![0-9]+]] *msb = ::my::std::byte{0}; - // CHECK-NOT: store i8 0, ptr %{{.*}} !tbaa [[TAG_CHAR]] } -// CHECK: !"any pointer", [[TYPE_CHAR:!.*]], -// CHECK: [[TYPE_CHAR]] = !{!"omnipotent char", [[TAG_CXX_TBAA:!.*]], -// CHECK: [[TAG_CXX_TBAA]] = !{!"Simple C++ TBAA"} -// CHECK: [[TAG_CHAR]] = !{[[TYPE_CHAR:!.*]], [[TYPE_CHAR]], i64 0} -// CHECK: [[TAG_INT]] = !{[[TYPE_INT:!.*]], [[TYPE_INT]], i64 0} -// CHECK: [[TYPE_INT]] = !{!"int", [[TYPE_CHAR]] +//. +// CHECK: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK: [[META6]] = !{!"Simple C++ TBAA"} +// CHECK: [[TBAA11]] = !{[[META5]], [[META5]], i64 0} +// CHECK: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// CHECK: [[META13]] = !{!"_ZTS4byte", [[META5]], i64 0} +// CHECK: [[TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0} +// CHECK: [[META15]] = !{!"_ZTSN2my4byteE", [[META5]], i64 0} +// CHECK: [[TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0} +// CHECK: [[META17]] = !{!"_ZTSN2my3std4byteE", [[META5]], i64 0} +//. diff --git a/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl b/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl index b55f663d6d948..05b7a9b40b02e 100644 --- a/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl +++ b/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl @@ -27,7 +27,7 @@ __amdgpu_buffer_rsrc_t getBuffer(void *p) { // CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq ptr addrspace(5) [[P]], addrspacecast (ptr null to ptr addrspace(5)) // CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[IF_END:.*]], label %[[IF_THEN:.*]] // CHECK: [[IF_THEN]]: -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[P]], align 16, !tbaa [[__AMDGPU_BUFFER_RSRC_T_TBAA4:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[P]], align 16, !tbaa [[__AMDGPU_BUFFER_RSRC_T_TBAA8:![0-9]+]] // CHECK-NEXT: tail call void @consumeBuffer(ptr addrspace(8) [[TMP0]]) #[[ATTR2]] // CHECK-NEXT: br label %[[IF_END]] // CHECK: [[IF_END]]: @@ -41,14 +41,14 @@ void consumeBufferPtr(__amdgpu_buffer_rsrc_t *p) { // CHECK-LABEL: define dso_local void @test( // CHECK-SAME: ptr addrspace(5) noundef readonly captures(address) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A]], align 16, !tbaa [[INT_TBAA8:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A]], align 16, !tbaa [[INT_TBAA10:![0-9]+]] // CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 // CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq ptr addrspace(5) [[A]], addrspacecast (ptr null to ptr addrspace(5)) // CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TOBOOL_NOT]], i1 true, i1 [[TOBOOL_NOT_I]] // CHECK-NEXT: br i1 [[OR_COND]], label %[[IF_END:.*]], label %[[IF_THEN_I:.*]] // CHECK: [[IF_THEN_I]]: // CHECK-NEXT: [[R:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[A]], i32 16 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[R]], align 16, !tbaa [[__AMDGPU_BUFFER_RSRC_T_TBAA4]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[R]], align 16, !tbaa [[__AMDGPU_BUFFER_RSRC_T_TBAA8]] // CHECK-NEXT: tail call void @consumeBuffer(ptr addrspace(8) [[TMP1]]) #[[ATTR2]] // CHECK-NEXT: br label %[[IF_END]] // CHECK: [[IF_END]]: @@ -81,11 +81,11 @@ AA bar(void *p) { return a; } //. -// CHECK: [[__AMDGPU_BUFFER_RSRC_T_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -// CHECK: [[META5]] = !{!"__amdgpu_buffer_rsrc_t", [[META6:![0-9]+]], i64 0} +// CHECK: [[META5:![0-9]+]] = !{!"int", [[META6:![0-9]+]], i64 0} // CHECK: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} // CHECK: [[META7]] = !{!"Simple C/C++ TBAA"} -// CHECK: [[INT_TBAA8]] = !{[[META9:![0-9]+]], [[META10:![0-9]+]], i64 0} -// CHECK: [[META9]] = !{!"AA_ty", [[META10]], i64 0, [[META5]], i64 16} -// CHECK: [[META10]] = !{!"int", [[META6]], i64 0} +// CHECK: [[__AMDGPU_BUFFER_RSRC_T_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +// CHECK: [[META9]] = !{!"__amdgpu_buffer_rsrc_t", [[META6]], i64 0} +// CHECK: [[INT_TBAA10]] = !{[[META11:![0-9]+]], [[META5]], i64 0} +// CHECK: [[META11]] = !{!"AA_ty", [[META5]], i64 0, [[META9]], i64 16} //. diff --git a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl index be822a6e55f58..8c3e5b70ea308 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl @@ -1,28 +1,28 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --include-generated-funcs --prefix-filecheck-ir-name VAR --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --include-generated-funcs --prefix-filecheck-ir-name VAR --version 6 // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1250 -disable-llvm-passes -fno-ident -emit-llvm %s -o - | FileCheck %s kernel void foo(global int *p) { *p = 1; } // CHECK: Function Attrs: convergent norecurse nounwind // CHECK-LABEL: define dso_local amdgpu_kernel void @foo( -// CHECK-SAME: ptr addrspace(1) noundef align 4 [[P:%.*]]) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { +// CHECK-SAME: ptr addrspace(1) noundef align 4 [[P:%.*]]) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr -// CHECK-NEXT: store ptr addrspace(1) [[P]], ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: store ptr addrspace(1) [[P]], ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[INTPTR_TBAA11:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[INTPTR_TBAA11]] // CHECK-NEXT: call void @__clang_ocl_kern_imp_foo(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret void // // // CHECK: Function Attrs: alwaysinline convergent norecurse nounwind // CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_foo( -// CHECK-SAME: ptr addrspace(1) noundef align 4 [[P:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] { +// CHECK-SAME: ptr addrspace(1) noundef align 4 [[P:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr -// CHECK-NEXT: store ptr addrspace(1) [[P]], ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] -// CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP0]], align 4, !tbaa [[TBAA12:![0-9]+]] +// CHECK-NEXT: store ptr addrspace(1) [[P]], ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[INTPTR_TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[INTPTR_TBAA11]] +// CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP0]], align 4, !tbaa [[INT_TBAA3:![0-9]+]] // CHECK-NEXT: ret void // //. @@ -33,15 +33,15 @@ kernel void foo(global int *p) { *p = 1; } // CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META2:![0-9]+]] = !{i32 2, i32 0} -// CHECK: [[META3]] = !{i32 1} -// CHECK: [[META4]] = !{!"none"} -// CHECK: [[META5]] = !{!"int*"} -// CHECK: [[META6]] = !{!""} -// CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} -// CHECK: [[META8]] = !{!"p1 int", [[META9:![0-9]+]], i64 0} -// CHECK: [[META9]] = !{!"any pointer", [[META10:![0-9]+]], i64 0} -// CHECK: [[META10]] = !{!"omnipotent char", [[META11:![0-9]+]], i64 0} -// CHECK: [[META11]] = !{!"Simple C/C++ TBAA"} -// CHECK: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} -// CHECK: [[META13]] = !{!"int", [[META10]], i64 0} +// CHECK: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK: [[META6]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META7]] = !{i32 1} +// CHECK: [[META8]] = !{!"none"} +// CHECK: [[META9]] = !{!"int*"} +// CHECK: [[META10]] = !{!""} +// CHECK: [[INTPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} +// CHECK: [[META12]] = !{!"p1 int", [[META13:![0-9]+]], i64 0} +// CHECK: [[META13]] = !{!"any pointer", [[META5]], i64 0} //. diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 6d573238440d2..e9adac23a6530 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -451,19 +451,19 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // GFX900-NEXT: [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr // GFX900-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr -// GFX900-NEXT: store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3:![0-9]+]] -// GFX900-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7:![0-9]+]] -// GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]] -// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]] -// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA7:![0-9]+]] +// GFX900-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA9:![0-9]+]] +// GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA7]] +// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA9]] +// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA7]] // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP1]], i64 [[TMP2]] -// GFX900-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8, !tbaa [[LONG_TBAA7]] // GFX900-NEXT: ret void // // // GFX900: Function Attrs: convergent norecurse nounwind // GFX900-LABEL: define dso_local amdgpu_kernel void @test( -// GFX900-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// GFX900-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META12:![0-9]+]] !kernel_arg_access_qual [[META13:![0-9]+]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META15:![0-9]+]] { // GFX900-NEXT: [[ENTRY:.*:]] // GFX900-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // GFX900-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5) @@ -473,21 +473,21 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr // GFX900-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr // GFX900-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr -// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14:![0-9]+]] -// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16:![0-9]+]] -// GFX900-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]] -// GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]] -// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14]] -// GFX900-NEXT: [[TMP1:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16]] -// GFX900-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]] -// GFX900-NEXT: [[TMP3:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA16:![0-9]+]] +// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA18:![0-9]+]] +// GFX900-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA9]] +// GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA7]] +// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA16]] +// GFX900-NEXT: [[TMP1:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA18]] +// GFX900-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA9]] +// GFX900-NEXT: [[TMP3:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA7]] // GFX900-NEXT: call void @__clang_ocl_kern_imp_test(ptr addrspace(1) noundef align 1 [[TMP0]], i8 noundef signext [[TMP1]], ptr addrspace(1) noundef align 8 [[TMP2]], i64 noundef [[TMP3]]) #[[ATTR8:[0-9]+]] // GFX900-NEXT: ret void // // // GFX900: Function Attrs: alwaysinline convergent norecurse nounwind // GFX900-LABEL: define dso_local void @__clang_ocl_kern_imp_test( -// GFX900-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] { +// GFX900-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META13]] !kernel_arg_type [[META14]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META15]] { // GFX900-NEXT: [[ENTRY:.*:]] // GFX900-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // GFX900-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5) @@ -519,16 +519,16 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr // GFX900-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr // GFX900-NEXT: [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr -// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14]] -// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16]] -// GFX900-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]] -// GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA16]] +// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA18]] +// GFX900-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA9]] +// GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA7]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9:[0-9]+]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[FLAGS]]) #[[ATTR9]] -// GFX900-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17:![0-9]+]] +// GFX900-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA3:![0-9]+]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]] // GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[QUEUE_T_TBAA19:![0-9]+]] -// GFX900-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]] +// GFX900-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA3]] // GFX900-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21:![0-9]+]] // GFX900-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0 // GFX900-NEXT: store i32 25, ptr [[BLOCK_SIZE]], align 8 @@ -537,14 +537,14 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 2 // GFX900-NEXT: store ptr @__test_block_invoke, ptr [[BLOCK_INVOKE]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 3 -// GFX900-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 8, !tbaa [[CHARPTR_TBAA14]] +// GFX900-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA16]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 8, !tbaa [[CHARPTR_TBAA16]] // GFX900-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 4 -// GFX900-NEXT: [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16]] -// GFX900-NEXT: store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA18]] +// GFX900-NEXT: store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8, !tbaa [[CHAR_TBAA18]] // GFX900-NEXT: [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr [[BLOCK_ASCAST]]) // GFX900-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[QUEUE_T_TBAA19]] -// GFX900-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]] +// GFX900-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA3]] // GFX900-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP2_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] // GFX900-NEXT: [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0 // GFX900-NEXT: store i32 41, ptr [[BLOCK_SIZE4]], align 8 @@ -553,20 +553,20 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 2 // GFX900-NEXT: store ptr @__test_block_invoke_2, ptr [[BLOCK_INVOKE6]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 3 -// GFX900-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8, !tbaa [[CHARPTR_TBAA14]] +// GFX900-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA16]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8, !tbaa [[CHARPTR_TBAA16]] // GFX900-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 6 -// GFX900-NEXT: [[TMP8:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16]] -// GFX900-NEXT: store i8 [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: [[TMP8:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA18]] +// GFX900-NEXT: store i8 [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8, !tbaa [[CHAR_TBAA18]] // GFX900-NEXT: [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 4 -// GFX900-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP9]], ptr [[BLOCK_CAPTURED9]], align 8, !tbaa [[LONGPTR_TBAA7]] +// GFX900-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA9]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP9]], ptr [[BLOCK_CAPTURED9]], align 8, !tbaa [[LONGPTR_TBAA9]] // GFX900-NEXT: [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 5 -// GFX900-NEXT: [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]] -// GFX900-NEXT: store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA7]] +// GFX900-NEXT: store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8, !tbaa [[LONG_TBAA7]] // GFX900-NEXT: [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr addrspace(5) [[VARTMP2]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr [[BLOCK3_ASCAST]]) // GFX900-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[QUEUE_T_TBAA19]] -// GFX900-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]] +// GFX900-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA3]] // GFX900-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP11_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] // GFX900-NEXT: [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0 // GFX900-NEXT: store i32 41, ptr [[BLOCK_SIZE13]], align 8 @@ -575,17 +575,17 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 2 // GFX900-NEXT: store ptr @__test_block_invoke_3, ptr [[BLOCK_INVOKE15]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 3 -// GFX900-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP14]], ptr [[BLOCK_CAPTURED16]], align 8, !tbaa [[CHARPTR_TBAA14]] +// GFX900-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA16]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP14]], ptr [[BLOCK_CAPTURED16]], align 8, !tbaa [[CHARPTR_TBAA16]] // GFX900-NEXT: [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 6 -// GFX900-NEXT: [[TMP15:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16]] -// GFX900-NEXT: store i8 [[TMP15]], ptr [[BLOCK_CAPTURED17]], align 8, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: [[TMP15:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA18]] +// GFX900-NEXT: store i8 [[TMP15]], ptr [[BLOCK_CAPTURED17]], align 8, !tbaa [[CHAR_TBAA18]] // GFX900-NEXT: [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 4 -// GFX900-NEXT: [[TMP16:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP16]], ptr [[BLOCK_CAPTURED18]], align 8, !tbaa [[LONGPTR_TBAA7]] +// GFX900-NEXT: [[TMP16:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA9]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP16]], ptr [[BLOCK_CAPTURED18]], align 8, !tbaa [[LONGPTR_TBAA9]] // GFX900-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5 -// GFX900-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]] -// GFX900-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA7]] +// GFX900-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, !tbaa [[LONG_TBAA7]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR9]] // GFX900-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr addrspace(5) [[BLOCK_SIZES]], i32 0, i32 0 // GFX900-NEXT: store i64 100, ptr addrspace(5) [[TMP18]], align 8 @@ -599,16 +599,16 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 2 // GFX900-NEXT: store ptr @__test_block_invoke_4, ptr [[BLOCK_INVOKE24]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 3 -// GFX900-NEXT: [[TMP20:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]] -// GFX900-NEXT: store i64 [[TMP20]], ptr [[BLOCK_CAPTURED25]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: [[TMP20:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA7]] +// GFX900-NEXT: store i64 [[TMP20]], ptr [[BLOCK_CAPTURED25]], align 8, !tbaa [[LONG_TBAA7]] // GFX900-NEXT: [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4 -// GFX900-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8, !tbaa [[LONGPTR_TBAA7]] -// GFX900-NEXT: store ptr [[BLOCK21_ASCAST]], ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA9]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8, !tbaa [[LONGPTR_TBAA9]] +// GFX900-NEXT: store ptr [[BLOCK21_ASCAST]], ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[CHAR_TBAA18]] // GFX900-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[QUEUE_T_TBAA19]] -// GFX900-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]] +// GFX900-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA3]] // GFX900-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP27_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] -// GFX900-NEXT: [[TMP24:%.*]] = load ptr, ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: [[TMP24:%.*]] = load ptr, ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[CHAR_TBAA18]] // GFX900-NEXT: [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr addrspace(5) [[VARTMP27]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr [[BLOCK21_ASCAST]]) // GFX900-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[BLOCK20]]) #[[ATTR9]] // GFX900-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]] @@ -643,11 +643,11 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[INTPTR_TBAA26]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[FLAGS]]) #[[ATTR9]] -// GFX900-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]] +// GFX900-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA3]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]] // GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() // GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[QUEUE_T_TBAA19]] -// GFX900-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]] +// GFX900-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA3]] // GFX900-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] // GFX900-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr)) // GFX900-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]] @@ -664,11 +664,11 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 -// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[CHAR_TBAA18]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 -// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[CHARPTR_TBAA14]] +// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[CHARPTR_TBAA16]] // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0 -// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA18]] // GFX900-NEXT: ret void // // @@ -691,17 +691,17 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 -// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[CHAR_TBAA18]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 -// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[CHARPTR_TBAA14]] +// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[CHARPTR_TBAA16]] // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0 -// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA18]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 -// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[LONG_TBAA7]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 -// GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[LONGPTR_TBAA7]] +// GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[LONGPTR_TBAA9]] // GFX900-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0 -// GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[LONG_TBAA7]] // GFX900-NEXT: ret void // // @@ -727,20 +727,20 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 // GFX900-NEXT: store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[ANYPTR_TBAA32:![0-9]+]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 -// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[CHAR_TBAA18]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 -// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[CHARPTR_TBAA14]] +// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[CHARPTR_TBAA16]] // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0 -// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA18]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 -// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[LONG_TBAA7]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 -// GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[LONGPTR_TBAA7]] +// GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[LONGPTR_TBAA9]] // GFX900-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0 -// GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[LONG_TBAA7]] // GFX900-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[ANYPTR_TBAA32]] // GFX900-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0 -// GFX900-NEXT: store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4, !tbaa [[INT_TBAA17]] +// GFX900-NEXT: store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4, !tbaa [[INT_TBAA3]] // GFX900-NEXT: ret void // // @@ -763,9 +763,9 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 -// GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[LONG_TBAA7]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 -// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[LONGPTR_TBAA7]] +// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[LONGPTR_TBAA9]] // GFX900-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8]] // GFX900-NEXT: ret void // @@ -852,36 +852,36 @@ kernel void test_target_features_kernel(global int *i) { // GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // GFX900: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // GFX900: [[META2:![0-9]+]] = !{i32 2, i32 0} -// GFX900: [[LONG_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} -// GFX900: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0} +// GFX900: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// GFX900: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} // GFX900: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} // GFX900: [[META6]] = !{!"Simple C/C++ TBAA"} -// GFX900: [[LONGPTR_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} -// GFX900: [[META8]] = !{!"p1 long", [[META9:![0-9]+]], i64 0} -// GFX900: [[META9]] = !{!"any pointer", [[META5]], i64 0} -// GFX900: [[META10]] = !{i32 1, i32 0, i32 1, i32 0} -// GFX900: [[META11]] = !{!"none", !"none", !"none", !"none"} -// GFX900: [[META12]] = !{!"char*", !"char", !"long*", !"long"} -// GFX900: [[META13]] = !{!"", !"", !"", !""} -// GFX900: [[CHARPTR_TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0} -// GFX900: [[META15]] = !{!"p1 omnipotent char", [[META9]], i64 0} -// GFX900: [[CHAR_TBAA16]] = !{[[META5]], [[META5]], i64 0} -// GFX900: [[INT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} -// GFX900: [[META18]] = !{!"int", [[META5]], i64 0} +// GFX900: [[LONG_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// GFX900: [[META8]] = !{!"long", [[META5]], i64 0} +// GFX900: [[LONGPTR_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} +// GFX900: [[META10]] = !{!"p1 long", [[META11:![0-9]+]], i64 0} +// GFX900: [[META11]] = !{!"any pointer", [[META5]], i64 0} +// GFX900: [[META12]] = !{i32 1, i32 0, i32 1, i32 0} +// GFX900: [[META13]] = !{!"none", !"none", !"none", !"none"} +// GFX900: [[META14]] = !{!"char*", !"char", !"long*", !"long"} +// GFX900: [[META15]] = !{!"", !"", !"", !""} +// GFX900: [[CHARPTR_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0} +// GFX900: [[META17]] = !{!"p1 omnipotent char", [[META11]], i64 0} +// GFX900: [[CHAR_TBAA18]] = !{[[META5]], [[META5]], i64 0} // GFX900: [[QUEUE_T_TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0} // GFX900: [[META20]] = !{!"queue_t", [[META5]], i64 0} -// GFX900: [[TBAA_STRUCT21]] = !{i64 0, i64 4, [[INT_TBAA17]]} +// GFX900: [[TBAA_STRUCT21]] = !{i64 0, i64 4, [[INT_TBAA3]]} // GFX900: [[META22]] = !{i32 1} // GFX900: [[META23]] = !{!"none"} // GFX900: [[META24]] = !{!"int*"} // GFX900: [[META25]] = !{!""} // GFX900: [[INTPTR_TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0} -// GFX900: [[META27]] = !{!"p1 int", [[META9]], i64 0} +// GFX900: [[META27]] = !{!"p1 int", [[META11]], i64 0} // GFX900: [[META28]] = !{ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle} // GFX900: [[META29]] = !{i32 0} // GFX900: [[META30]] = !{!"__block_literal"} // GFX900: [[META31]] = !{ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle} -// GFX900: [[ANYPTR_TBAA32]] = !{[[META9]], [[META9]], i64 0} +// GFX900: [[ANYPTR_TBAA32]] = !{[[META11]], [[META11]], i64 0} // GFX900: [[META33]] = !{ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle} // GFX900: [[META34]] = !{i32 0, i32 3} // GFX900: [[META35]] = !{!"none", !"none"} diff --git a/clang/test/CodeGenOpenCL/amdgpu-printf.cl b/clang/test/CodeGenOpenCL/amdgpu-printf.cl index cea7ee576d822..829f672a6ccc9 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-printf.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-printf.cl @@ -16,71 +16,71 @@ __kernel void test_printf_str_int(int i) { printf("%s:%d", s, i); } // CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_noargs( -// CHECK-SAME: ) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META4]] { +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: call void @__clang_ocl_kern_imp_test_printf_noargs() #[[ATTR5:[0-9]+]] // CHECK-NEXT: ret void // // // CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_noargs( -// CHECK-SAME: ) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META4]] { +// CHECK-SAME: ) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str) #[[ATTR5]] // CHECK-NEXT: ret void // // // CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_int( -// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META5:![0-9]+]] !kernel_arg_access_qual [[META6:![0-9]+]] !kernel_arg_type [[META7:![0-9]+]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8:![0-9]+]] { +// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META10:![0-9]+]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META11]] !kernel_arg_type_qual [[META12:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9:![0-9]+]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]] +// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA4:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA4]] // CHECK-NEXT: call void @__clang_ocl_kern_imp_test_printf_int(i32 noundef [[TMP0]]) #[[ATTR5]] // CHECK-NEXT: ret void // // // CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_int( -// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] { +// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META11]] !kernel_arg_type_qual [[META12]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]] +// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA4]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA4]] // CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.1, i32 noundef [[TMP0]]) #[[ATTR5]] // CHECK-NEXT: ret void // // // CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_str_int( -// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] { +// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META11]] !kernel_arg_type_qual [[META12]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]] +// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA4]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA4]] // CHECK-NEXT: call void @__clang_ocl_kern_imp_test_printf_str_int(i32 noundef [[TMP0]]) #[[ATTR5]] // CHECK-NEXT: ret void // // // CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_str_int( -// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] { +// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META11]] !kernel_arg_type_qual [[META12]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[S:%.*]] = alloca [4 x i8], align 1, addrspace(5) -// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]] +// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA4]] // CHECK-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[S]]) #[[ATTR6:[0-9]+]] // CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 1 [[S]], ptr addrspace(4) align 1 @__const.test_printf_str_int.s, i64 4, i1 false) // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr addrspace(5) [[S]], i64 0, i64 0 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA4]] // CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.2, ptr addrspace(5) noundef [[ARRAYDECAY]], i32 noundef [[TMP0]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[S]]) #[[ATTR6]] // CHECK-NEXT: ret void // //. -// CHECK: [[META4]] = !{} -// CHECK: [[META5]] = !{i32 0} -// CHECK: [[META6]] = !{!"none"} -// CHECK: [[META7]] = !{!"int"} -// CHECK: [[META8]] = !{!""} -// CHECK: [[INT_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} -// CHECK: [[META10]] = !{!"int", [[META11:![0-9]+]], i64 0} -// CHECK: [[META11]] = !{!"omnipotent char", [[META12:![0-9]+]], i64 0} -// CHECK: [[META12]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// CHECK: [[META5]] = !{!"int", [[META6:![0-9]+]], i64 0} +// CHECK: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// CHECK: [[META7]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META8]] = !{} +// CHECK: [[META9]] = !{i32 0} +// CHECK: [[META10]] = !{!"none"} +// CHECK: [[META11]] = !{!"int"} +// CHECK: [[META12]] = !{!""} //. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl index 321835cc3d28d..6326866ed3c35 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl @@ -18,7 +18,7 @@ typedef int v8i __attribute__((ext_vector_type(8))); // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> [[A]], <8 x half> [[B]], <8 x float> [[C]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4:![0-9]+]] +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8:![0-9]+]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_f16_w32(global v8f* out, v8h a, v8h b, v8f c) @@ -34,7 +34,7 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w32(global v8f* out, v8h a, v8h b, v8f c) // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16> [[A]], <8 x i16> [[B]], <8 x float> [[C]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_bf16_w32(global v8f* out, v8s a, v8s b, v8f c) @@ -50,7 +50,7 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w32(global v8f* out, v8s a, v8s b, v8f c // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> [[A]], <8 x half> [[B]], <8 x half> [[C]], i1 false) -// CHECK-GFX1200-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f16_16x16x16_f16_w32(global v8h* out, v8h a, v8h b, v8h c) @@ -66,7 +66,7 @@ void test_amdgcn_wmma_f16_16x16x16_f16_w32(global v8h* out, v8h a, v8h b, v8h c) // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16(<8 x i16> [[A]], <8 x i16> [[B]], <8 x i16> [[C]], i1 false) -// CHECK-GFX1200-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_bf16_16x16x16_bf16_w32(global v8s* out, v8s a, v8s b, v8s c) @@ -82,7 +82,7 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w32(global v8s* out, v8s a, v8s b, v8s // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 true, <2 x i32> [[A]], i1 true, <2 x i32> [[B]], <8 x i32> [[C]], i1 false) -// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x16_iu8_w32(global v8i* out, v2i a, v2i b, v8i c) @@ -98,7 +98,7 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w32(global v8i* out, v2i a, v2i b, v8i c) // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <8 x i32> [[C]], i1 false) -// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x16_iu4_w32(global v8i* out, int a, int b, v8i c) @@ -110,7 +110,7 @@ void test_amdgcn_wmma_i32_16x16x16_iu4_w32(global v8i* out, int a, int b, v8i c) // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v8f* out, v2i a, v2i b, v8f c) @@ -122,7 +122,7 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v8f* out, v2i a, v2i b, v8 // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v8f* out, v2i a, v2i b, v8f c) @@ -134,7 +134,7 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v8f* out, v2i a, v2i b, v8 // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v8f* out, v2i a, v2i b, v8f c) @@ -146,7 +146,7 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v8f* out, v2i a, v2i b, v8 // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v8f* out, v2i a, v2i b, v8f c) @@ -158,7 +158,7 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v8f* out, v2i a, v2i b, v8 // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 true, <2 x i32> [[A]], i1 true, <2 x i32> [[B]], <8 x i32> [[C]], i1 false) -// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x32_iu4_w32(global v8i* out, v2i a, v2i b, v8i c) @@ -166,7 +166,7 @@ void test_amdgcn_wmma_i32_16x16x32_iu4_w32(global v8i* out, v2i a, v2i b, v8i c) *out = __builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12(true, a, true, b, c, false); } //. -// CHECK-GFX1200: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -// CHECK-GFX1200: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -// CHECK-GFX1200: [[META6]] = !{!"Simple C/C++ TBAA"} +// CHECK-GFX1200: [[META6:![0-9]+]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// CHECK-GFX1200: [[META7]] = !{!"Simple C/C++ TBAA"} +// CHECK-GFX1200: [[CHAR_TBAA8]] = !{[[META6]], [[META6]], i64 0} //. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl index 8b5b31537ce58..a79c3d4da1ebb 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl @@ -17,7 +17,7 @@ typedef int v4i __attribute__((ext_vector_type(4))); // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> [[A]], <4 x half> [[B]], <4 x float> [[C]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4:![0-9]+]] +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8:![0-9]+]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_f16_w64(global v4f* out, v4h a, v4h b, v4f c) @@ -33,7 +33,7 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w64(global v4f* out, v4h a, v4h b, v4f c) // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16(<4 x i16> [[A]], <4 x i16> [[B]], <4 x float> [[C]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* out, v4s a, v4s b, v4f c) @@ -49,7 +49,7 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* out, v4s a, v4s b, v4f c // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16(<4 x half> [[A]], <4 x half> [[B]], <4 x half> [[C]], i1 false) -// CHECK-GFX1200-NEXT: store <4 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f16_16x16x16_f16_w64(global v4h* out, v4h a, v4h b, v4h c) @@ -65,7 +65,7 @@ void test_amdgcn_wmma_f16_16x16x16_f16_w64(global v4h* out, v4h a, v4h b, v4h c) // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16(<4 x i16> [[A]], <4 x i16> [[B]], <4 x i16> [[C]], i1 false) -// CHECK-GFX1200-NEXT: store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_bf16_16x16x16_bf16_w64(global v4s* out, v4s a, v4s b, v4s c) @@ -81,7 +81,7 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w64(global v4s* out, v4s a, v4s b, v4s // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i1 false) -// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x16_iu8_w64(global v4i* out, int a, int b, v4i c) @@ -97,7 +97,7 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w64(global v4i* out, int a, int b, v4i c) // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i1 false) -// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x16_iu4_w64(global v4i* out, int a, int b, v4i c) @@ -109,7 +109,7 @@ void test_amdgcn_wmma_i32_16x16x16_iu4_w64(global v4i* out, int a, int b, v4i c) // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v4f* out, int a, int b, v4f c) @@ -121,7 +121,7 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v4f* out, int a, int b, v4 // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v4f* out, int a, int b, v4f c) @@ -133,7 +133,7 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v4f* out, int a, int b, v4 // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v4f* out, int a, int b, v4f c) @@ -145,7 +145,7 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v4f* out, int a, int b, v4 // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v4f* out, int a, int b, v4f c) @@ -157,7 +157,7 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v4f* out, int a, int b, v4 // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i1 false) -// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x32_iu4_w32(global v4i* out, int a, int b, v4i c) @@ -165,7 +165,7 @@ void test_amdgcn_wmma_i32_16x16x32_iu4_w32(global v4i* out, int a, int b, v4i c) *out = __builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12(true, a, true, b, c, false); } //. -// CHECK-GFX1200: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -// CHECK-GFX1200: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -// CHECK-GFX1200: [[META6]] = !{!"Simple C/C++ TBAA"} +// CHECK-GFX1200: [[META6:![0-9]+]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// CHECK-GFX1200: [[META7]] = !{!"Simple C/C++ TBAA"} +// CHECK-GFX1200: [[CHAR_TBAA8]] = !{[[META6]], [[META6]], i64 0} //. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl index e03ae66f92035..22004627b561f 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl @@ -152,7 +152,7 @@ void test_amdgcn_ds_atomic_async_barrier_arrive_b64(local long* addr) // CHECK-GFX1250-SAME: ptr addrspace(3) noundef captures(none) [[ADDR:%.*]], i64 noundef [[DATA:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { // CHECK-GFX1250-NEXT: [[ENTRY:.*:]] // CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3) [[ADDR]], i64 [[DATA]]) -// CHECK-GFX1250-NEXT: store i64 [[TMP0]], ptr [[OUT]], align 8, !tbaa [[LONG_TBAA4:![0-9]+]] +// CHECK-GFX1250-NEXT: store i64 [[TMP0]], ptr [[OUT]], align 8, !tbaa [[LONG_TBAA8:![0-9]+]] // CHECK-GFX1250-NEXT: ret void // void test_amdgcn_ds_atomic_barrier_arrive_rtn_b64(local long* addr, long data, long *out) @@ -160,8 +160,8 @@ void test_amdgcn_ds_atomic_barrier_arrive_rtn_b64(local long* addr, long data, l *out = __builtin_amdgcn_ds_atomic_barrier_arrive_rtn_b64(addr, data); } //. -// CHECK-GFX1250: [[LONG_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -// CHECK-GFX1250: [[META5]] = !{!"long", [[META6:![0-9]+]], i64 0} -// CHECK-GFX1250: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// CHECK-GFX1250: [[META6:![0-9]+]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} // CHECK-GFX1250: [[META7]] = !{!"Simple C/C++ TBAA"} +// CHECK-GFX1250: [[LONG_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +// CHECK-GFX1250: [[META9]] = !{!"long", [[META6]], i64 0} //. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl index b3367202f824e..a02c97b115b5a 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl @@ -13,7 +13,7 @@ typedef unsigned int uint; // CHECK-LABEL: define dso_local amdgpu_kernel void @test_builtins_amdgcn_gws_insts -// CHECK-SAME: (i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { +// CHECK-SAME: (i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space !8 !kernel_arg_access_qual !9 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !11 { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.amdgcn.ds.gws.init(i32 [[A]], i32 [[B]]) // CHECK-NEXT: tail call void @llvm.amdgcn.ds.gws.barrier(i32 [[A]], i32 [[B]]) diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl index 214390142b6aa..72ba1915fa01c 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl @@ -17,7 +17,7 @@ typedef short v16s __attribute__((ext_vector_type(16))); // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <8 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i32(<8 x half> [[A]], <16 x half> [[B]], <8 x float> [[C]], i32 [[INDEX]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4:![0-9]+]] +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8:![0-9]+]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_f16_w32(global v8f* out, v8h a, v16h b, v8f c, int index) @@ -29,7 +29,7 @@ void test_amdgcn_swmmac_f32_16x16x32_f16_w32(global v8f* out, v8h a, v16h b, v8f // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <8 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.i32(<8 x i16> [[A]], <16 x i16> [[B]], <8 x float> [[C]], i32 [[INDEX]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_bf16_w32(global v8f* out, v8s a, v16s b, v8f c, int index) @@ -41,7 +41,7 @@ void test_amdgcn_swmmac_f32_16x16x32_bf16_w32(global v8f* out, v8s a, v16s b, v8 // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <8 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.i32(<8 x half> [[A]], <16 x half> [[B]], <8 x half> [[C]], i32 [[INDEX]]) -// CHECK-GFX1200-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f16_16x16x32_f16_w32(global v8h* out, v8h a, v16h b, v8h c, int index) @@ -53,7 +53,7 @@ void test_amdgcn_swmmac_f16_16x16x32_f16_w32(global v8h* out, v8h a, v16h b, v8h // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <8 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.i32(<8 x i16> [[A]], <16 x i16> [[B]], <8 x i16> [[C]], i32 [[INDEX]]) -// CHECK-GFX1200-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(global v8s* out, v8s a, v16s b, v8s c, int index) @@ -65,7 +65,7 @@ void test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(global v8s* out, v8s a, v16s b, v // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i32(i1 true, <2 x i32> [[A]], i1 true, <4 x i32> [[B]], <8 x i32> [[C]], i32 [[INDEX]], i1 true) -// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_i32_16x16x32_iu8_w32(global v8i* out, v2i a, v4i b, v8i c, int index) @@ -77,7 +77,7 @@ void test_amdgcn_swmmac_i32_16x16x32_iu8_w32(global v8i* out, v2i a, v4i b, v8i // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i32(i1 true, i32 [[A]], i1 true, <2 x i32> [[B]], <8 x i32> [[C]], i32 [[INDEX]], i1 true) -// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_i32_16x16x32_iu4_w32(global v8i* out, int a, v2i b, v8i c, int index) @@ -89,7 +89,7 @@ void test_amdgcn_swmmac_i32_16x16x32_iu4_w32(global v8i* out, int a, v2i b, v8i // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 true, <2 x i32> [[A]], i1 true, <4 x i32> [[B]], <8 x i32> [[C]], i32 [[INDEX]], i1 true) -// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_i32_16x16x64_iu4_w32(global v8i* out, v2i a, v4i b, v8i c, int index) @@ -101,7 +101,7 @@ void test_amdgcn_swmmac_i32_16x16x64_iu4_w32(global v8i* out, v2i a, v4i b, v8i // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A]], <4 x i32> [[B]], <8 x float> [[C]], i32 [[INDEX]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) @@ -113,7 +113,7 @@ void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(global v8f* out, v2i a, v4i b, // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A]], <4 x i32> [[B]], <8 x float> [[C]], i32 [[INDEX]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) @@ -125,7 +125,7 @@ void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(global v8f* out, v2i a, v4i b, // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A]], <4 x i32> [[B]], <8 x float> [[C]], i32 [[INDEX]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) @@ -137,7 +137,7 @@ void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(global v8f* out, v2i a, v4i b, // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A]], <4 x i32> [[B]], <8 x float> [[C]], i32 [[INDEX]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) @@ -145,7 +145,7 @@ void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(global v8f* out, v2i a, v4i b, *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(a, b, c, index); } //. -// CHECK-GFX1200: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -// CHECK-GFX1200: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -// CHECK-GFX1200: [[META6]] = !{!"Simple C/C++ TBAA"} +// CHECK-GFX1200: [[META6:![0-9]+]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// CHECK-GFX1200: [[META7]] = !{!"Simple C/C++ TBAA"} +// CHECK-GFX1200: [[CHAR_TBAA8]] = !{[[META6]], [[META6]], i64 0} //. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl index 47753afd1aa52..015c493c66d48 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl @@ -16,7 +16,7 @@ typedef short v8s __attribute__((ext_vector_type(8))); // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i32(<4 x half> [[A]], <8 x half> [[B]], <4 x float> [[C]], i32 [[INDEX]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4:![0-9]+]] +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8:![0-9]+]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_f16_w64(global v4f* out, v4h a, v8h b, v4f c, int index) @@ -28,7 +28,7 @@ void test_amdgcn_swmmac_f32_16x16x32_f16_w64(global v4f* out, v4h a, v8h b, v4f // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.i32(<4 x i16> [[A]], <8 x i16> [[B]], <4 x float> [[C]], i32 [[INDEX]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_bf16_w64(global v4f* out, v4s a, v8s b, v4f c, int index) @@ -40,7 +40,7 @@ void test_amdgcn_swmmac_f32_16x16x32_bf16_w64(global v4f* out, v4s a, v8s b, v4f // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]], <4 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i32(<4 x half> [[A]], <8 x half> [[B]], <4 x half> [[C]], i32 [[INDEX]]) -// CHECK-GFX1200-NEXT: store <4 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f16_16x16x32_f16_w64(global v4h* out, v4h a, v8h b, v4h c, int index) @@ -52,7 +52,7 @@ void test_amdgcn_swmmac_f16_16x16x32_f16_w64(global v4h* out, v4h a, v8h b, v4h // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]], <4 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.i32(<4 x i16> [[A]], <8 x i16> [[B]], <4 x i16> [[C]], i32 [[INDEX]]) -// CHECK-GFX1200-NEXT: store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_bf16_16x16x32_bf16_w64(global v4s* out, v4s a, v8s b, v4s c, int index) @@ -64,7 +64,7 @@ void test_amdgcn_swmmac_bf16_16x16x32_bf16_w64(global v4s* out, v4s a, v8s b, v4 // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i32(i1 true, i32 [[A]], i1 true, <2 x i32> [[B]], <4 x i32> [[C]], i32 [[INDEX]], i1 true) -// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_i32_16x16x32_iu8_w64(global v4i* out, int a, v2i b, v4i c, int index) @@ -76,7 +76,7 @@ void test_amdgcn_swmmac_i32_16x16x32_iu8_w64(global v4i* out, int a, v2i b, v4i // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i32 [[INDEX]], i1 true) -// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_i32_16x16x32_iu4_w64(global v4i* out, int a, int b, v4i c, int index) @@ -88,7 +88,7 @@ void test_amdgcn_swmmac_i32_16x16x32_iu4_w64(global v4i* out, int a, int b, v4i // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i32(i1 true, i32 [[A]], i1 true, <2 x i32> [[B]], <4 x i32> [[C]], i32 [[INDEX]], i1 true) -// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_i32_16x16x64_iu4_w64(global v4i* out, int a, v2i b, v4i c, int index) @@ -100,7 +100,7 @@ void test_amdgcn_swmmac_i32_16x16x64_iu4_w64(global v4i* out, int a, v2i b, v4i // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.i32(i32 [[A]], <2 x i32> [[B]], <4 x float> [[C]], i32 [[INDEX]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(global v4f* out, int a, v2i b, v4f c, int index) @@ -112,7 +112,7 @@ void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(global v4f* out, int a, v2i b, // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.i32(i32 [[A]], <2 x i32> [[B]], <4 x float> [[C]], i32 [[INDEX]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(global v4f* out, int a, v2i b, v4f c, int index) @@ -124,7 +124,7 @@ void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(global v4f* out, int a, v2i b, // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.i32(i32 [[A]], <2 x i32> [[B]], <4 x float> [[C]], i32 [[INDEX]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(global v4f* out, int a, v2i b, v4f c, int index) @@ -136,7 +136,7 @@ void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(global v4f* out, int a, v2i b, // CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1200-NEXT: [[ENTRY:.*:]] // CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.i32(i32 [[A]], <2 x i32> [[B]], <4 x float> [[C]], i32 [[INDEX]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(global v4f* out, int a, v2i b, v4f c, int index) @@ -144,7 +144,7 @@ void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(global v4f* out, int a, v2i b, *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(a, b, c, index); } //. -// CHECK-GFX1200: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -// CHECK-GFX1200: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -// CHECK-GFX1200: [[META6]] = !{!"Simple C/C++ TBAA"} +// CHECK-GFX1200: [[META6:![0-9]+]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// CHECK-GFX1200: [[META7]] = !{!"Simple C/C++ TBAA"} +// CHECK-GFX1200: [[CHAR_TBAA8]] = !{[[META6]], [[META6]], i64 0} //. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w32.cl index 853cd32f8bdce..2f3ab1148912a 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w32.cl @@ -21,7 +21,7 @@ typedef short v16s __attribute__((ext_vector_type(16))); // CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-GFX1100-NEXT: [[ENTRY:.*:]] // CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v16f16(<16 x half> [[A]], <16 x half> [[B]], <8 x float> [[C]]) -// CHECK-GFX1100-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4:![0-9]+]] +// CHECK-GFX1100-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8:![0-9]+]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_f16_w32(global v8f* out, v16h a, v16h b, v8f c) @@ -37,7 +37,7 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w32(global v8f* out, v16h a, v16h b, v8f // CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1100-NEXT: [[ENTRY:.*:]] // CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <8 x float> [[C]]) -// CHECK-GFX1100-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1100-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_bf16_w32(global v8f* out, v16s a, v16s b, v8f c) @@ -53,7 +53,7 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w32(global v8f* out, v16s a, v16s b, v8f // CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <16 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1100-NEXT: [[ENTRY:.*:]] // CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v16f16.v16f16(<16 x half> [[A]], <16 x half> [[B]], <16 x half> [[C]], i1 true) -// CHECK-GFX1100-NEXT: store <16 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1100-NEXT: store <16 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_f16_16x16x16_f16_w32(global v16h* out, v16h a, v16h b, v16h c) @@ -69,7 +69,7 @@ void test_amdgcn_wmma_f16_16x16x16_f16_w32(global v16h* out, v16h a, v16h b, v16 // CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <16 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1100-NEXT: [[ENTRY:.*:]] // CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v16i16.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <16 x i16> [[C]], i1 true) -// CHECK-GFX1100-NEXT: store <16 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1100-NEXT: store <16 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_bf16_16x16x16_bf16_w32(global v16s* out, v16s a, v16s b, v16s c) @@ -85,7 +85,7 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w32(global v16s* out, v16s a, v16s b, v // CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <16 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1100-NEXT: [[ENTRY:.*:]] // CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied.v16f16.v16f16(<16 x half> [[A]], <16 x half> [[B]], <16 x half> [[C]], i1 true) -// CHECK-GFX1100-NEXT: store <16 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1100-NEXT: store <16 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_f16_16x16x16_f16_tied_w32(global v16h* out, v16h a, v16h b, v16h c) @@ -101,7 +101,7 @@ void test_amdgcn_wmma_f16_16x16x16_f16_tied_w32(global v16h* out, v16h a, v16h b // CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <16 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1100-NEXT: [[ENTRY:.*:]] // CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied.v16i16.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <16 x i16> [[C]], i1 true) -// CHECK-GFX1100-NEXT: store <16 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1100-NEXT: store <16 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32(global v16s* out, v16s a, v16s b, v16s c) @@ -117,7 +117,7 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32(global v16s* out, v16s a, v16s // CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1100-NEXT: [[ENTRY:.*:]] // CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v4i32(i1 true, <4 x i32> [[A]], i1 true, <4 x i32> [[B]], <8 x i32> [[C]], i1 false) -// CHECK-GFX1100-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1100-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x16_iu8_w32(global v8i* out, v4i a, v4i b, v8i c) @@ -133,7 +133,7 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w32(global v8i* out, v4i a, v4i b, v8i c) // CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1100-NEXT: [[ENTRY:.*:]] // CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.v2i32(i1 true, <2 x i32> [[A]], i1 true, <2 x i32> [[B]], <8 x i32> [[C]], i1 false) -// CHECK-GFX1100-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1100-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x16_iu4_w32(global v8i* out, v2i a, v2i b, v8i c) @@ -143,7 +143,7 @@ void test_amdgcn_wmma_i32_16x16x16_iu4_w32(global v8i* out, v2i a, v2i b, v8i c) #endif //. -// CHECK-GFX1100: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -// CHECK-GFX1100: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -// CHECK-GFX1100: [[META6]] = !{!"Simple C/C++ TBAA"} +// CHECK-GFX1100: [[META6:![0-9]+]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// CHECK-GFX1100: [[META7]] = !{!"Simple C/C++ TBAA"} +// CHECK-GFX1100: [[CHAR_TBAA8]] = !{[[META6]], [[META6]], i64 0} //. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w64.cl index 9b6872f6b1e6d..024a9287c071b 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w64.cl @@ -22,7 +22,7 @@ typedef short v16s __attribute__((ext_vector_type(16))); // CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-GFX1100-NEXT: [[ENTRY:.*:]] // CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 x half> [[A]], <16 x half> [[B]], <4 x float> [[C]]) -// CHECK-GFX1100-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4:![0-9]+]] +// CHECK-GFX1100-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8:![0-9]+]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_f16_w64(global v4f* out, v16h a, v16h b, v4f c) @@ -38,7 +38,7 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w64(global v4f* out, v16h a, v16h b, v4f // CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1100-NEXT: [[ENTRY:.*:]] // CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <4 x float> [[C]]) -// CHECK-GFX1100-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1100-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* out, v16s a, v16s b, v4f c) @@ -54,7 +54,7 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* out, v16s a, v16s b, v4f // CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1100-NEXT: [[ENTRY:.*:]] // CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v16f16(<16 x half> [[A]], <16 x half> [[B]], <8 x half> [[C]], i1 true) -// CHECK-GFX1100-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1100-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_f16_16x16x16_f16_w64(global v8h* out, v16h a, v16h b, v8h c) @@ -70,7 +70,7 @@ void test_amdgcn_wmma_f16_16x16x16_f16_w64(global v8h* out, v16h a, v16h b, v8h // CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1100-NEXT: [[ENTRY:.*:]] // CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <8 x i16> [[C]], i1 true) -// CHECK-GFX1100-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1100-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_bf16_16x16x16_bf16_w64(global v8s* out, v16s a, v16s b, v8s c) @@ -86,7 +86,7 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w64(global v8s* out, v16s a, v16s b, v8 // CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1100-NEXT: [[ENTRY:.*:]] // CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied.v8f16.v16f16(<16 x half> [[A]], <16 x half> [[B]], <8 x half> [[C]], i1 true) -// CHECK-GFX1100-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1100-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_f16_16x16x16_f16_tied_w64(global v8h* out, v16h a, v16h b, v8h c) @@ -102,7 +102,7 @@ void test_amdgcn_wmma_f16_16x16x16_f16_tied_w64(global v8h* out, v16h a, v16h b, // CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1100-NEXT: [[ENTRY:.*:]] // CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied.v8i16.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <8 x i16> [[C]], i1 true) -// CHECK-GFX1100-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1100-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64(global v8s* out, v16s a, v16s b, v8s c) @@ -118,7 +118,7 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64(global v8s* out, v16s a, v16s // CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1100-NEXT: [[ENTRY:.*:]] // CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.v4i32(i1 true, <4 x i32> [[A]], i1 true, <4 x i32> [[B]], <4 x i32> [[C]], i1 false) -// CHECK-GFX1100-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1100-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x16_iu8_w64(global v4i* out, v4i a, v4i b, v4i c) @@ -134,7 +134,7 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w64(global v4i* out, v4i a, v4i b, v4i c) // CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-GFX1100-NEXT: [[ENTRY:.*:]] // CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.v2i32(i1 true, <2 x i32> [[A]], i1 true, <2 x i32> [[B]], <4 x i32> [[C]], i1 false) -// CHECK-GFX1100-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] +// CHECK-GFX1100-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x16_iu4_w64(global v4i* out, v2i a, v2i b, v4i c) @@ -144,7 +144,7 @@ void test_amdgcn_wmma_i32_16x16x16_iu4_w64(global v4i* out, v2i a, v2i b, v4i c) #endif //. -// CHECK-GFX1100: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -// CHECK-GFX1100: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -// CHECK-GFX1100: [[META6]] = !{!"Simple C/C++ TBAA"} +// CHECK-GFX1100: [[META6:![0-9]+]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// CHECK-GFX1100: [[META7]] = !{!"Simple C/C++ TBAA"} +// CHECK-GFX1100: [[CHAR_TBAA8]] = !{[[META6]], [[META6]], i64 0} //. diff --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl b/clang/test/CodeGenOpenCL/preserve_vec3.cl index 6e5c1c49504ec..e76aa81f918cb 100644 --- a/clang/test/CodeGenOpenCL/preserve_vec3.cl +++ b/clang/test/CodeGenOpenCL/preserve_vec3.cl @@ -9,11 +9,11 @@ typedef float float3 __attribute__((ext_vector_type(3))); typedef float float4 __attribute__((ext_vector_type(4))); // CHECK-LABEL: define dso_local spir_kernel void @foo( -// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META6:![0-9]+]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type [[META10:![0-9]+]] !kernel_arg_type_qual [[META11:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16 // CHECK-NEXT: [[EXTRACTVEC1_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC1_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA8:![0-9]+]] +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC1_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA12:![0-9]+]] // CHECK-NEXT: ret void // void kernel foo(global float3 *a, global float3 *b) { @@ -21,11 +21,11 @@ void kernel foo(global float3 *a, global float3 *b) { } // CHECK-LABEL: define dso_local spir_kernel void @float4_to_float3( -// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[A:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META12:![0-9]+]] !kernel_arg_type_qual [[META7]] { +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[A:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META14:![0-9]+]] !kernel_arg_type_qual [[META11]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA8]] +// CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA12]] // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 16, !tbaa [[CHAR_TBAA8]] +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 16, !tbaa [[CHAR_TBAA12]] // CHECK-NEXT: ret void // void kernel float4_to_float3(global float3 *a, global float4 *b) { @@ -33,11 +33,11 @@ void kernel float4_to_float3(global float3 *a, global float4 *b) { } // CHECK-LABEL: define dso_local spir_kernel void @float3_to_float4( -// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META7]] { +// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META11]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16 // CHECK-NEXT: [[ASTYPE_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[ASTYPE_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA8]] +// CHECK-NEXT: store <4 x float> [[ASTYPE_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA12]] // CHECK-NEXT: ret void // void kernel float3_to_float4(global float3 *a, global float4 *b) { @@ -45,11 +45,11 @@ void kernel float3_to_float4(global float3 *a, global float4 *b) { } // CHECK-LABEL: define dso_local spir_kernel void @float3_to_double2( -// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META14:![0-9]+]] !kernel_arg_type_qual [[META7]] { +// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META16:![0-9]+]] !kernel_arg_type_qual [[META11]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[TMP1]], ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA8]] +// CHECK-NEXT: store <4 x float> [[TMP1]], ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA12]] // CHECK-NEXT: ret void // void kernel float3_to_double2(global float3 *a, global double2 *b) { @@ -57,11 +57,11 @@ void kernel float3_to_double2(global float3 *a, global double2 *b) { } // CHECK-LABEL: define dso_local spir_kernel void @char8_to_short3( -// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], ptr addrspace(1) noundef readonly align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META16:![0-9]+]] !kernel_arg_type_qual [[META7]] { +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], ptr addrspace(1) noundef readonly align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META18:![0-9]+]] !kernel_arg_type_qual [[META11]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[CHAR_TBAA8]] +// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[CHAR_TBAA12]] // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[CHAR_TBAA8]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[CHAR_TBAA12]] // CHECK-NEXT: ret void // void kernel char8_to_short3(global short3 *a, global char8 *b) { @@ -72,7 +72,7 @@ void kernel char8_to_short3(global short3 *a, global char8 *b) { // CHECK-SAME: <3 x i8> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> -// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[INT_TBAA17:![0-9]+]] +// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[INT_TBAA3:![0-9]+]] // CHECK-NEXT: ret void // void from_char3(char3 a, global int *out) { @@ -95,7 +95,7 @@ void from_short3(short3 a, global long *out) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[A]] to <4 x i8> // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> -// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[CHAR_TBAA8]] +// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[CHAR_TBAA12]] // CHECK-NEXT: ret void // void scalar_to_char3(int a, global char3 *out) { @@ -107,7 +107,7 @@ void scalar_to_char3(int a, global char3 *out) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <4 x i16> // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA8]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA12]] // CHECK-NEXT: ret void // void scalar_to_short3(long a, global short3 *out) { @@ -115,22 +115,22 @@ void scalar_to_short3(long a, global short3 *out) { } //. -// CHECK: [[META3]] = !{i32 1, i32 1} -// CHECK: [[META4]] = !{!"none", !"none"} -// CHECK: [[META5]] = !{!"float3*", !"float3*"} -// CHECK: [[META6]] = !{!"float __attribute__((ext_vector_type(3)))*", !"float __attribute__((ext_vector_type(3)))*"} -// CHECK: [[META7]] = !{!"", !""} -// CHECK: [[CHAR_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} -// CHECK: [[META9]] = !{!"omnipotent char", [[META10:![0-9]+]], i64 0} -// CHECK: [[META10]] = !{!"Simple C/C++ TBAA"} -// CHECK: [[META11]] = !{!"float3*", !"float4*"} -// CHECK: [[META12]] = !{!"float __attribute__((ext_vector_type(3)))*", !"float __attribute__((ext_vector_type(4)))*"} -// CHECK: [[META13]] = !{!"float3*", !"double2*"} -// CHECK: [[META14]] = !{!"float __attribute__((ext_vector_type(3)))*", !"double __attribute__((ext_vector_type(2)))*"} -// CHECK: [[META15]] = !{!"short3*", !"char8*"} -// CHECK: [[META16]] = !{!"short __attribute__((ext_vector_type(3)))*", !"char __attribute__((ext_vector_type(8)))*"} -// CHECK: [[INT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} -// CHECK: [[META18]] = !{!"int", [[META9]], i64 0} +// CHECK: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK: [[META6]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META7]] = !{i32 1, i32 1} +// CHECK: [[META8]] = !{!"none", !"none"} +// CHECK: [[META9]] = !{!"float3*", !"float3*"} +// CHECK: [[META10]] = !{!"float __attribute__((ext_vector_type(3)))*", !"float __attribute__((ext_vector_type(3)))*"} +// CHECK: [[META11]] = !{!"", !""} +// CHECK: [[CHAR_TBAA12]] = !{[[META5]], [[META5]], i64 0} +// CHECK: [[META13]] = !{!"float3*", !"float4*"} +// CHECK: [[META14]] = !{!"float __attribute__((ext_vector_type(3)))*", !"float __attribute__((ext_vector_type(4)))*"} +// CHECK: [[META15]] = !{!"float3*", !"double2*"} +// CHECK: [[META16]] = !{!"float __attribute__((ext_vector_type(3)))*", !"double __attribute__((ext_vector_type(2)))*"} +// CHECK: [[META17]] = !{!"short3*", !"char8*"} +// CHECK: [[META18]] = !{!"short __attribute__((ext_vector_type(3)))*", !"char __attribute__((ext_vector_type(8)))*"} // CHECK: [[LONG_TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0} -// CHECK: [[META20]] = !{!"long", [[META9]], i64 0} +// CHECK: [[META20]] = !{!"long", [[META5]], i64 0} //. diff --git a/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp b/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp index e932e75d025e0..07f5929756fcf 100644 --- a/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp +++ b/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp @@ -2,11 +2,11 @@ //RUN: %clang_cc1 %s -triple spir -emit-llvm -O1 -o - | FileCheck %s // CHECK-LABEL: define dso_local spir_kernel void @test( -// CHECK-SAME: ptr addrspace(1) noundef readonly align 8 captures(none) [[IN:%.*]], ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// CHECK-SAME: ptr addrspace(1) noundef readonly align 8 captures(none) [[IN:%.*]], ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META9:![0-9]+]] !kernel_arg_type [[META10:![0-9]+]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[IN]], i32 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(1) [[ARRAYIDX1_I]], align 8, !tbaa [[LONG_TBAA8:![0-9]+]] -// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[LONG_TBAA8]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(1) [[ARRAYIDX1_I]], align 8, !tbaa [[LONG_TBAA12:![0-9]+]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[LONG_TBAA12]] // CHECK-NEXT: ret void // __kernel void test(__global long *In, __global long *Out) { @@ -14,12 +14,12 @@ __kernel void test(__global long *In, __global long *Out) { *Out = m[1]; } //. -// CHECK: [[META4]] = !{i32 1, i32 1} -// CHECK: [[META5]] = !{!"none", !"none"} -// CHECK: [[META6]] = !{!"long*", !"long*"} -// CHECK: [[META7]] = !{!"", !""} -// CHECK: [[LONG_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} -// CHECK: [[META9]] = !{!"long", [[META10:![0-9]+]], i64 0} -// CHECK: [[META10]] = !{!"omnipotent char", [[META11:![0-9]+]], i64 0} -// CHECK: [[META11]] = !{!"Simple C++ TBAA"} +// CHECK: [[META6:![0-9]+]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// CHECK: [[META7]] = !{!"Simple C++ TBAA"} +// CHECK: [[META8]] = !{i32 1, i32 1} +// CHECK: [[META9]] = !{!"none", !"none"} +// CHECK: [[META10]] = !{!"long*", !"long*"} +// CHECK: [[META11]] = !{!"", !""} +// CHECK: [[LONG_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// CHECK: [[META13]] = !{!"long", [[META6]], i64 0} //. diff --git a/clang/test/DebugInfo/Generic/unsigned-promotion-debuginfo.c b/clang/test/DebugInfo/Generic/unsigned-promotion-debuginfo.c index 6ca17e1f9f285..d50e0befcee79 100644 --- a/clang/test/DebugInfo/Generic/unsigned-promotion-debuginfo.c +++ b/clang/test/DebugInfo/Generic/unsigned-promotion-debuginfo.c @@ -12,34 +12,34 @@ unsigned short si, sj, sk; // CHECKS-LABEL: define dso_local void @testshortmul( -// CHECKS-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG13:![0-9]+]] { +// CHECKS-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG17:![0-9]+]] { // CHECKS-NEXT: [[ENTRY:.*:]] -// CHECKS-NEXT: [[TMP0:%.*]] = load i16, ptr @sj, align 2, !dbg [[DBG16:![0-9]+]], !tbaa [[SHORT_TBAA17:![0-9]+]] -// CHECKS-NEXT: [[CONV:%.*]] = zext i16 [[TMP0]] to i32, !dbg [[DBG16]] -// CHECKS-NEXT: [[TMP1:%.*]] = load i16, ptr @sk, align 2, !dbg [[DBG21:![0-9]+]], !tbaa [[SHORT_TBAA17]] -// CHECKS-NEXT: [[CONV1:%.*]] = zext i16 [[TMP1]] to i32, !dbg [[DBG21]] -// CHECKS-NEXT: [[TMP2:%.*]] = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[CONV]], i32 [[CONV1]]), !dbg [[DBG22:![0-9]+]], !nosanitize [[META26:![0-9]+]] -// CHECKS-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1, !dbg [[DBG22]], !nosanitize [[META26]] -// CHECKS-NEXT: br i1 [[TMP3]], label %[[HANDLER_MUL_OVERFLOW:.*]], label %[[CONT:.*]], !dbg [[DBG22]], !prof [[PROF27:![0-9]+]], !nosanitize [[META26]] +// CHECKS-NEXT: [[TMP0:%.*]] = load i16, ptr @sj, align 2, !dbg [[DBG20:![0-9]+]], !tbaa [[SHORT_TBAA21:![0-9]+]] +// CHECKS-NEXT: [[CONV:%.*]] = zext i16 [[TMP0]] to i32, !dbg [[DBG20]] +// CHECKS-NEXT: [[TMP1:%.*]] = load i16, ptr @sk, align 2, !dbg [[DBG23:![0-9]+]], !tbaa [[SHORT_TBAA21]] +// CHECKS-NEXT: [[CONV1:%.*]] = zext i16 [[TMP1]] to i32, !dbg [[DBG23]] +// CHECKS-NEXT: [[TMP2:%.*]] = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[CONV]], i32 [[CONV1]]), !dbg [[DBG24:![0-9]+]], !nosanitize [[META28:![0-9]+]] +// CHECKS-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1, !dbg [[DBG24]], !nosanitize [[META28]] +// CHECKS-NEXT: br i1 [[TMP3]], label %[[HANDLER_MUL_OVERFLOW:.*]], label %[[CONT:.*]], !dbg [[DBG24]], !prof [[PROF29:![0-9]+]], !nosanitize [[META28]] // CHECKS: [[HANDLER_MUL_OVERFLOW]]: -// CHECKS-NEXT: [[TMP4:%.*]] = zext i16 [[TMP0]] to i64, !dbg [[DBG22]] -// CHECKS-NEXT: [[TMP5:%.*]] = zext i16 [[TMP1]] to i64, !dbg [[DBG22]] -// CHECKS-NEXT: tail call void @__ubsan_handle_mul_overflow_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[TMP4]], i64 [[TMP5]]) #[[ATTR3:[0-9]+]], !dbg [[DBG22]], !nosanitize [[META26]] -// CHECKS-NEXT: unreachable, !dbg [[DBG22]], !nosanitize [[META26]] +// CHECKS-NEXT: [[TMP4:%.*]] = zext i16 [[TMP0]] to i64, !dbg [[DBG24]] +// CHECKS-NEXT: [[TMP5:%.*]] = zext i16 [[TMP1]] to i64, !dbg [[DBG24]] +// CHECKS-NEXT: tail call void @__ubsan_handle_mul_overflow_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[TMP4]], i64 [[TMP5]]) #[[ATTR3:[0-9]+]], !dbg [[DBG24]], !nosanitize [[META28]] +// CHECKS-NEXT: unreachable, !dbg [[DBG24]], !nosanitize [[META28]] // CHECKS: [[CONT]]: -// CHECKS-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0, !dbg [[DBG22]], !nosanitize [[META26]] -// CHECKS-NEXT: [[CONV2:%.*]] = trunc i32 [[TMP6]] to i16, !dbg [[DBG16]] -// CHECKS-NEXT: store i16 [[CONV2]], ptr @si, align 2, !dbg [[DBG28:![0-9]+]], !tbaa [[SHORT_TBAA17]] -// CHECKS-NEXT: ret void, !dbg [[DBG29:![0-9]+]] +// CHECKS-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0, !dbg [[DBG24]], !nosanitize [[META28]] +// CHECKS-NEXT: [[CONV2:%.*]] = trunc i32 [[TMP6]] to i16, !dbg [[DBG20]] +// CHECKS-NEXT: store i16 [[CONV2]], ptr @si, align 2, !dbg [[DBG30:![0-9]+]], !tbaa [[SHORT_TBAA21]] +// CHECKS-NEXT: ret void, !dbg [[DBG31:![0-9]+]] // // CHECKU-LABEL: define dso_local void @testshortmul( -// CHECKU-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG13:![0-9]+]] { +// CHECKU-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG17:![0-9]+]] { // CHECKU-NEXT: [[ENTRY:.*:]] -// CHECKU-NEXT: [[TMP0:%.*]] = load i16, ptr @sj, align 2, !dbg [[DBG16:![0-9]+]], !tbaa [[SHORT_TBAA17:![0-9]+]] -// CHECKU-NEXT: [[TMP1:%.*]] = load i16, ptr @sk, align 2, !dbg [[DBG21:![0-9]+]], !tbaa [[SHORT_TBAA17]] -// CHECKU-NEXT: [[MUL:%.*]] = mul i16 [[TMP1]], [[TMP0]], !dbg [[DBG22:![0-9]+]] -// CHECKU-NEXT: store i16 [[MUL]], ptr @si, align 2, !dbg [[DBG23:![0-9]+]], !tbaa [[SHORT_TBAA17]] -// CHECKU-NEXT: ret void, !dbg [[DBG24:![0-9]+]] +// CHECKU-NEXT: [[TMP0:%.*]] = load i16, ptr @sj, align 2, !dbg [[DBG20:![0-9]+]], !tbaa [[SHORT_TBAA21:![0-9]+]] +// CHECKU-NEXT: [[TMP1:%.*]] = load i16, ptr @sk, align 2, !dbg [[DBG23:![0-9]+]], !tbaa [[SHORT_TBAA21]] +// CHECKU-NEXT: [[MUL:%.*]] = mul i16 [[TMP1]], [[TMP0]], !dbg [[DBG24:![0-9]+]] +// CHECKU-NEXT: store i16 [[MUL]], ptr @si, align 2, !dbg [[DBG25:![0-9]+]], !tbaa [[SHORT_TBAA21]] +// CHECKU-NEXT: ret void, !dbg [[DBG26:![0-9]+]] // void testshortmul(void) { @@ -58,23 +58,23 @@ void testshortmul(void) { // CHECKS: [[META8]] = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned) // CHECKS: [[META9]] = !DIGlobalVariableExpression(var: [[META10:![0-9]+]], expr: !DIExpression()) // CHECKS: [[META10]] = distinct !DIGlobalVariable(name: "sk", scope: [[META2]], file: [[META7]], line: 12, type: [[META8]], isLocal: false, isDefinition: true) -// CHECKS: [[DBG13]] = distinct !DISubprogram(name: "testshortmul", scope: [[META7]], file: [[META7]], line: 44, type: [[META14:![0-9]+]], scopeLine: 44, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META2]]) -// CHECKS: [[META14]] = !DISubroutineType(types: [[META15:![0-9]+]]) -// CHECKS: [[META15]] = !{null} -// CHECKS: [[DBG16]] = !DILocation(line: 47, column: 8, scope: [[DBG13]]) -// CHECKS: [[SHORT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} -// CHECKS: [[META18]] = !{!"short", [[META19:![0-9]+]], i64 0} -// CHECKS: [[META19]] = !{!"omnipotent char", [[META20:![0-9]+]], i64 0} -// CHECKS: [[META20]] = !{!"Simple C/C++ TBAA"} -// CHECKS: [[DBG21]] = !DILocation(line: 47, column: 13, scope: [[DBG13]]) -// CHECKS: [[DBG22]] = !DILocation(line: 0, scope: [[META23:![0-9]+]], inlinedAt: [[META25:![0-9]+]]) -// CHECKS: [[META23]] = distinct !DISubprogram(name: "__ubsan_check_mul_overflow", scope: [[META7]], file: [[META7]], type: [[META24:![0-9]+]], flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: [[META2]]) -// CHECKS: [[META24]] = !DISubroutineType(types: null) -// CHECKS: [[META25]] = !DILocation(line: 47, column: 11, scope: [[DBG13]]) -// CHECKS: [[META26]] = !{} -// CHECKS: [[PROF27]] = !{!"branch_weights", i32 1, i32 1048575} -// CHECKS: [[DBG28]] = !DILocation(line: 47, column: 6, scope: [[DBG13]]) -// CHECKS: [[DBG29]] = !DILocation(line: 48, column: 1, scope: [[DBG13]]) +// CHECKS: [[META15:![0-9]+]] = !{!"omnipotent char", [[META16:![0-9]+]], i64 0} +// CHECKS: [[META16]] = !{!"Simple C/C++ TBAA"} +// CHECKS: [[DBG17]] = distinct !DISubprogram(name: "testshortmul", scope: [[META7]], file: [[META7]], line: 44, type: [[META18:![0-9]+]], scopeLine: 44, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META2]]) +// CHECKS: [[META18]] = !DISubroutineType(types: [[META19:![0-9]+]]) +// CHECKS: [[META19]] = !{null} +// CHECKS: [[DBG20]] = !DILocation(line: 47, column: 8, scope: [[DBG17]]) +// CHECKS: [[SHORT_TBAA21]] = !{[[META22:![0-9]+]], [[META22]], i64 0} +// CHECKS: [[META22]] = !{!"short", [[META15]], i64 0} +// CHECKS: [[DBG23]] = !DILocation(line: 47, column: 13, scope: [[DBG17]]) +// CHECKS: [[DBG24]] = !DILocation(line: 0, scope: [[META25:![0-9]+]], inlinedAt: [[META27:![0-9]+]]) +// CHECKS: [[META25]] = distinct !DISubprogram(name: "__ubsan_check_mul_overflow", scope: [[META7]], file: [[META7]], type: [[META26:![0-9]+]], flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: [[META2]]) +// CHECKS: [[META26]] = !DISubroutineType(types: null) +// CHECKS: [[META27]] = !DILocation(line: 47, column: 11, scope: [[DBG17]]) +// CHECKS: [[META28]] = !{} +// CHECKS: [[PROF29]] = !{!"branch_weights", i32 1, i32 1048575} +// CHECKS: [[DBG30]] = !DILocation(line: 47, column: 6, scope: [[DBG17]]) +// CHECKS: [[DBG31]] = !DILocation(line: 48, column: 1, scope: [[DBG17]]) //. // CHECKU: [[META0:![0-9]+]] = !DIGlobalVariableExpression(var: [[META1:![0-9]+]], expr: !DIExpression()) // CHECKU: [[META1]] = distinct !DIGlobalVariable(name: "sj", scope: [[META2:![0-9]+]], file: [[META7:![0-9]+]], line: 12, type: [[META8:![0-9]+]], isLocal: false, isDefinition: true) @@ -87,16 +87,16 @@ void testshortmul(void) { // CHECKU: [[META8]] = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned) // CHECKU: [[META9]] = !DIGlobalVariableExpression(var: [[META10:![0-9]+]], expr: !DIExpression()) // CHECKU: [[META10]] = distinct !DIGlobalVariable(name: "sk", scope: [[META2]], file: [[META7]], line: 12, type: [[META8]], isLocal: false, isDefinition: true) -// CHECKU: [[DBG13]] = distinct !DISubprogram(name: "testshortmul", scope: [[META7]], file: [[META7]], line: 44, type: [[META14:![0-9]+]], scopeLine: 44, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META2]]) -// CHECKU: [[META14]] = !DISubroutineType(types: [[META15:![0-9]+]]) -// CHECKU: [[META15]] = !{null} -// CHECKU: [[DBG16]] = !DILocation(line: 47, column: 8, scope: [[DBG13]]) -// CHECKU: [[SHORT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} -// CHECKU: [[META18]] = !{!"short", [[META19:![0-9]+]], i64 0} -// CHECKU: [[META19]] = !{!"omnipotent char", [[META20:![0-9]+]], i64 0} -// CHECKU: [[META20]] = !{!"Simple C/C++ TBAA"} -// CHECKU: [[DBG21]] = !DILocation(line: 47, column: 13, scope: [[DBG13]]) -// CHECKU: [[DBG22]] = !DILocation(line: 47, column: 11, scope: [[DBG13]]) -// CHECKU: [[DBG23]] = !DILocation(line: 47, column: 6, scope: [[DBG13]]) -// CHECKU: [[DBG24]] = !DILocation(line: 48, column: 1, scope: [[DBG13]]) +// CHECKU: [[META15:![0-9]+]] = !{!"omnipotent char", [[META16:![0-9]+]], i64 0} +// CHECKU: [[META16]] = !{!"Simple C/C++ TBAA"} +// CHECKU: [[DBG17]] = distinct !DISubprogram(name: "testshortmul", scope: [[META7]], file: [[META7]], line: 44, type: [[META18:![0-9]+]], scopeLine: 44, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META2]]) +// CHECKU: [[META18]] = !DISubroutineType(types: [[META19:![0-9]+]]) +// CHECKU: [[META19]] = !{null} +// CHECKU: [[DBG20]] = !DILocation(line: 47, column: 8, scope: [[DBG17]]) +// CHECKU: [[SHORT_TBAA21]] = !{[[META22:![0-9]+]], [[META22]], i64 0} +// CHECKU: [[META22]] = !{!"short", [[META15]], i64 0} +// CHECKU: [[DBG23]] = !DILocation(line: 47, column: 13, scope: [[DBG17]]) +// CHECKU: [[DBG24]] = !DILocation(line: 47, column: 11, scope: [[DBG17]]) +// CHECKU: [[DBG25]] = !DILocation(line: 47, column: 6, scope: [[DBG17]]) +// CHECKU: [[DBG26]] = !DILocation(line: 48, column: 1, scope: [[DBG17]]) //. diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index b88aa3cc18207..7e2691633c215 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -50,7 +50,7 @@ typedef unsigned long long uint64_t; // CHECK-LABEL: define dso_local i64 @test___make_mantissa_base8( // CHECK-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA4:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA8:![0-9]+]] // CHECK-NEXT: [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0 // CHECK-NEXT: br i1 [[CMP_NOT_I1]], label %[[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:.*]], label %[[WHILE_BODY_I:.*]] // CHECK: [[WHILE_BODY_I]]: @@ -66,9 +66,9 @@ typedef unsigned long long uint64_t; // CHECK-NEXT: [[ADD_I:%.*]] = add i64 [[MUL_I]], -48 // CHECK-NEXT: [[SUB_I]] = add i64 [[ADD_I]], [[CONV5_I]] // CHECK-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I2]], i64 1 -// CHECK-NEXT: [[TMP3]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]] +// CHECK-NEXT: [[TMP3]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP3]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I]], label %[[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK-NEXT: br i1 [[CMP_NOT_I]], label %[[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK: [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]]: // CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[WHILE_BODY_I]] ], [ [[SUB_I]], %[[IF_THEN_I]] ] // CHECK-NEXT: ret i64 [[RETVAL_2_I]] @@ -80,7 +80,7 @@ typedef unsigned long long uint64_t; // AMDGCNSPIRV: [[WHILE_COND_I]]: // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P]], %[[ENTRY]] ], [ [[__TAGP_ADDR_1_I:%.*]], %[[WHILE_BODY_I:.*]] ] // AMDGCNSPIRV-NEXT: [[__R_0_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[__R_1_I:%.*]], %[[WHILE_BODY_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[CHAR_TBAA5:![0-9]+]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[CHAR_TBAA9:![0-9]+]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0 // AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label %[[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:.*]], label %[[WHILE_BODY_I]] // AMDGCNSPIRV: [[WHILE_BODY_I]]: @@ -93,7 +93,7 @@ typedef unsigned long long uint64_t; // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_IDX:%.*]] = zext i1 [[OR_COND_I]] to i64 // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 [[__TAGP_ADDR_1_I_IDX]] // AMDGCNSPIRV-NEXT: [[__R_1_I]] = select i1 [[OR_COND_I]], i64 [[SUB_I]], i64 [[__R_0_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label %[[WHILE_COND_I]], label %[[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], !llvm.loop [[LOOP8:![0-9]+]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label %[[WHILE_COND_I]], label %[[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], !llvm.loop [[LOOP10:![0-9]+]] // AMDGCNSPIRV: [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]]: // AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[WHILE_BODY_I]] ], [ [[__R_0_I]], %[[WHILE_COND_I]] ] // AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_2_I]] @@ -105,7 +105,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) { // CHECK-LABEL: define dso_local i64 @test___make_mantissa_base10( // CHECK-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA4]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0 // CHECK-NEXT: br i1 [[CMP_NOT_I1]], label %[[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:.*]], label %[[WHILE_BODY_I:.*]] // CHECK: [[WHILE_BODY_I]]: @@ -121,9 +121,9 @@ extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) { // CHECK-NEXT: [[ADD_I:%.*]] = add i64 [[MUL_I]], -48 // CHECK-NEXT: [[SUB_I]] = add i64 [[ADD_I]], [[CONV5_I]] // CHECK-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I2]], i64 1 -// CHECK-NEXT: [[TMP3]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]] +// CHECK-NEXT: [[TMP3]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP3]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I]], label %[[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK-NEXT: br i1 [[CMP_NOT_I]], label %[[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK: [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]]: // CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[WHILE_BODY_I]] ], [ [[SUB_I]], %[[IF_THEN_I]] ] // CHECK-NEXT: ret i64 [[RETVAL_2_I]] @@ -135,7 +135,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) { // AMDGCNSPIRV: [[WHILE_COND_I]]: // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P]], %[[ENTRY]] ], [ [[__TAGP_ADDR_1_I:%.*]], %[[WHILE_BODY_I:.*]] ] // AMDGCNSPIRV-NEXT: [[__R_0_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[__R_1_I:%.*]], %[[WHILE_BODY_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0 // AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label %[[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:.*]], label %[[WHILE_BODY_I]] // AMDGCNSPIRV: [[WHILE_BODY_I]]: @@ -148,7 +148,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) { // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_IDX:%.*]] = zext i1 [[OR_COND_I]] to i64 // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 [[__TAGP_ADDR_1_I_IDX]] // AMDGCNSPIRV-NEXT: [[__R_1_I]] = select i1 [[OR_COND_I]], i64 [[SUB_I]], i64 [[__R_0_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label %[[WHILE_COND_I]], label %[[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], !llvm.loop [[LOOP11:![0-9]+]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label %[[WHILE_COND_I]], label %[[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], !llvm.loop [[LOOP13:![0-9]+]] // AMDGCNSPIRV: [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]]: // AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[WHILE_BODY_I]] ], [ [[__R_0_I]], %[[WHILE_COND_I]] ] // AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_2_I]] @@ -160,7 +160,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base10(const char *p) { // CHECK-LABEL: define dso_local i64 @test___make_mantissa_base16( // CHECK-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA4]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0 // CHECK-NEXT: br i1 [[CMP_NOT_I1]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:.*]], label %[[WHILE_BODY_I:.*]] // CHECK: [[WHILE_BODY_I]]: @@ -185,9 +185,9 @@ extern "C" __device__ uint64_t test___make_mantissa_base10(const char *p) { // CHECK-NEXT: [[ADD26_I:%.*]] = add i64 [[MUL24_I]], [[DOTSINK]] // CHECK-NEXT: [[ADD28_I]] = add i64 [[ADD26_I]], [[CONV25_I]] // CHECK-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I2]], i64 1 -// CHECK-NEXT: [[TMP5]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]] +// CHECK-NEXT: [[TMP5]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP5]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK-NEXT: br i1 [[CMP_NOT_I]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK: [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]]: // CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[IF_ELSE17_I]] ], [ [[ADD28_I]], %[[IF_END31_I]] ] // CHECK-NEXT: ret i64 [[RETVAL_2_I]] @@ -195,7 +195,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base10(const char *p) { // AMDGCNSPIRV-LABEL: define spir_func i64 @test___make_mantissa_base16( // AMDGCNSPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*]]: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0 // AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I1]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:.*]], label %[[WHILE_BODY_I:.*]] // AMDGCNSPIRV: [[WHILE_BODY_I]]: @@ -220,9 +220,9 @@ extern "C" __device__ uint64_t test___make_mantissa_base10(const char *p) { // AMDGCNSPIRV-NEXT: [[ADD26_I:%.*]] = add i64 [[MUL24_I]], [[DOTSINK]] // AMDGCNSPIRV-NEXT: [[ADD28_I]] = add i64 [[ADD26_I]], [[CONV25_I]] // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I2]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP5]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP5]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP5]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP12:![0-9]+]] +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]] // AMDGCNSPIRV: [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]]: // AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[IF_ELSE17_I]] ], [ [[ADD28_I]], %[[IF_END31_I]] ] // AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_2_I]] @@ -234,26 +234,26 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { // CHECK-LABEL: define dso_local i64 @test___make_mantissa( // CHECK-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA4]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i8 [[TMP0]], 48 // CHECK-NEXT: br i1 [[CMP_I]], label %[[IF_THEN_I:.*]], label %[[WHILE_COND_I14_I_PREHEADER:.*]] // CHECK: [[WHILE_COND_I14_I_PREHEADER]]: -// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA4]] +// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: [[CMP_NOT_I17_I5:%.*]] = icmp eq i8 [[TMP1]], 0 // CHECK-NEXT: br i1 [[CMP_NOT_I17_I5]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I:.*]] // CHECK: [[IF_THEN_I]]: // CHECK-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 1 -// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]] +// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: switch i8 [[TMP2]], label %[[WHILE_COND_I_I_PREHEADER:.*]] [ // CHECK-NEXT: i8 120, label %[[IF_THEN5_I:.*]] // CHECK-NEXT: i8 88, label %[[IF_THEN5_I]] // CHECK-NEXT: ] // CHECK: [[WHILE_COND_I_I_PREHEADER]]: -// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]] +// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: [[CMP_NOT_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0 // CHECK-NEXT: br i1 [[CMP_NOT_I_I14]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I_I:.*]] // CHECK: [[IF_THEN5_I]]: -// CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]] +// CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: [[CMP_NOT_I30_I9:%.*]] = icmp eq i8 [[TMP4]], 0 // CHECK-NEXT: br i1 [[CMP_NOT_I30_I9]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I31_I:.*]] // CHECK: [[WHILE_BODY_I31_I]]: @@ -278,9 +278,9 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { // CHECK-NEXT: [[ADD26_I_I:%.*]] = add i64 [[MUL24_I_I]], [[DOTSINK]] // CHECK-NEXT: [[ADD28_I_I]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]] // CHECK-NEXT: [[INCDEC_PTR_I34_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I10]], i64 1 -// CHECK-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I]], align 1, !tbaa [[CHAR_TBAA4]] +// CHECK-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I]], align 1, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: [[CMP_NOT_I30_I:%.*]] = icmp eq i8 [[TMP9]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I30_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I31_I]], !llvm.loop [[LOOP11]] +// CHECK-NEXT: br i1 [[CMP_NOT_I30_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I31_I]], !llvm.loop [[LOOP13]] // CHECK: [[WHILE_BODY_I_I]]: // CHECK-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_PREHEADER]] ] // CHECK-NEXT: [[__R_0_I_I16:%.*]] = phi i64 [ [[SUB_I_I:%.*]], %[[IF_THEN_I_I]] ], [ 0, %[[WHILE_COND_I_I_PREHEADER]] ] @@ -294,9 +294,9 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { // CHECK-NEXT: [[ADD_I_I:%.*]] = add i64 [[MUL_I_I]], -48 // CHECK-NEXT: [[SUB_I_I]] = add i64 [[ADD_I_I]], [[CONV5_I_I]] // CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I15]], i64 1 -// CHECK-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// CHECK-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq i8 [[TMP12]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I_I]], !llvm.loop [[LOOP7]] +// CHECK-NEXT: br i1 [[CMP_NOT_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I_I]], !llvm.loop [[LOOP9]] // CHECK: [[WHILE_BODY_I18_I]]: // CHECK-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_PREHEADER]] ] // CHECK-NEXT: [[__R_0_I16_I7:%.*]] = phi i64 [ [[SUB_I25_I:%.*]], %[[IF_THEN_I21_I]] ], [ 0, %[[WHILE_COND_I14_I_PREHEADER]] ] @@ -310,9 +310,9 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { // CHECK-NEXT: [[ADD_I24_I:%.*]] = add i64 [[MUL_I22_I]], -48 // CHECK-NEXT: [[SUB_I25_I]] = add i64 [[ADD_I24_I]], [[CONV5_I23_I]] // CHECK-NEXT: [[INCDEC_PTR_I26_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I6]], i64 1 -// CHECK-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I]], align 1, !tbaa [[CHAR_TBAA4]] +// CHECK-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I]], align 1, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: [[CMP_NOT_I17_I:%.*]] = icmp eq i8 [[TMP15]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I17_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I18_I]], !llvm.loop [[LOOP10]] +// CHECK-NEXT: br i1 [[CMP_NOT_I17_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I18_I]], !llvm.loop [[LOOP12]] // CHECK: [[_ZL15__MAKE_MANTISSAPKC_EXIT]]: // CHECK-NEXT: [[RETVAL_0_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I]] ], [ 0, %[[WHILE_COND_I14_I_PREHEADER]] ], [ [[SUB_I_I]], %[[IF_THEN_I_I]] ], [ 0, %[[WHILE_BODY_I_I]] ], [ [[ADD28_I_I]], %[[IF_END31_I_I]] ], [ 0, %[[IF_ELSE17_I_I]] ], [ [[SUB_I25_I]], %[[IF_THEN_I21_I]] ], [ 0, %[[WHILE_BODY_I18_I]] ] // CHECK-NEXT: ret i64 [[RETVAL_0_I]] @@ -320,18 +320,18 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { // AMDGCNSPIRV-LABEL: define spir_func i64 @test___make_mantissa( // AMDGCNSPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*]]: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_I:%.*]] = icmp eq i8 [[TMP0]], 48 // AMDGCNSPIRV-NEXT: br i1 [[CMP_I]], label %[[IF_THEN_I:.*]], label %[[WHILE_COND_I14_I:.*]] // AMDGCNSPIRV: [[IF_THEN_I]]: // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[P]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: switch i8 [[TMP1]], label %[[WHILE_COND_I_I:.*]] [ // AMDGCNSPIRV-NEXT: i8 120, label %[[IF_THEN5_I:.*]] // AMDGCNSPIRV-NEXT: i8 88, label %[[IF_THEN5_I]] // AMDGCNSPIRV-NEXT: ] // AMDGCNSPIRV: [[IF_THEN5_I]]: -// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I5:%.*]] = icmp eq i8 [[TMP2]], 0 // AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I5]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT:.*]], label %[[WHILE_BODY_I32_I:.*]] // AMDGCNSPIRV: [[WHILE_BODY_I32_I]]: @@ -356,13 +356,13 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { // AMDGCNSPIRV-NEXT: [[ADD26_I_I:%.*]] = add i64 [[MUL24_I_I]], [[DOTSINK]] // AMDGCNSPIRV-NEXT: [[ADD28_I_I]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]] // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I36_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I6]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I:%.*]] = icmp eq i8 [[TMP7]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I32_I]], !llvm.loop [[LOOP12]] +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I32_I]], !llvm.loop [[LOOP14]] // AMDGCNSPIRV: [[WHILE_COND_I_I]]: // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I:%.*]], %[[WHILE_BODY_I_I:.*]] ], [ [[INCDEC_PTR_I]], %[[IF_THEN_I]] ] // AMDGCNSPIRV-NEXT: [[__R_0_I_I:%.*]] = phi i64 [ [[__R_1_I_I:%.*]], %[[WHILE_BODY_I_I]] ], [ 0, %[[IF_THEN_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq i8 [[TMP8]], 0 // AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I_I]] // AMDGCNSPIRV: [[WHILE_BODY_I_I]]: @@ -375,11 +375,11 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I]] to i64 // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], i64 [[__TAGP_ADDR_1_I_I_IDX]] // AMDGCNSPIRV-NEXT: [[__R_1_I_I]] = select i1 [[OR_COND_I_I]], i64 [[SUB_I_I]], i64 [[__R_0_I_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I]], label %[[WHILE_COND_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP8]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I]], label %[[WHILE_COND_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP10]] // AMDGCNSPIRV: [[WHILE_COND_I14_I]]: // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I15_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I:%.*]], %[[WHILE_BODY_I18_I:.*]] ], [ [[P]], %[[ENTRY]] ] // AMDGCNSPIRV-NEXT: [[__R_0_I16_I:%.*]] = phi i64 [ [[__R_1_I26_I:%.*]], %[[WHILE_BODY_I18_I]] ], [ 0, %[[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I17_I:%.*]] = icmp eq i8 [[TMP10]], 0 // AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I17_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I18_I]] // AMDGCNSPIRV: [[WHILE_BODY_I18_I]]: @@ -392,7 +392,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I]] to i64 // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], i64 [[__TAGP_ADDR_1_I25_I_IDX]] // AMDGCNSPIRV-NEXT: [[__R_1_I26_I]] = select i1 [[OR_COND_I19_I]], i64 [[SUB_I23_I]], i64 [[__R_0_I16_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I]], label %[[WHILE_COND_I14_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP11]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I]], label %[[WHILE_COND_I14_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP13]] // AMDGCNSPIRV: [[_ZL15__MAKE_MANTISSAPKC_EXIT]]: // AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi i64 [ 0, %[[IF_THEN5_I]] ], [ 0, %[[WHILE_BODY_I_I]] ], [ [[__R_0_I_I]], %[[WHILE_COND_I_I]] ], [ [[ADD28_I_I]], %[[IF_END31_I_I]] ], [ 0, %[[IF_ELSE17_I_I]] ], [ 0, %[[WHILE_BODY_I18_I]] ], [ [[__R_0_I16_I]], %[[WHILE_COND_I14_I]] ] // AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_0_I]] @@ -452,25 +452,25 @@ extern "C" __device__ long long test_llabs(long x) { // DEFAULT-LABEL: define dso_local noundef float @test_acosf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X]]) #[[ATTR14:[0-9]+]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X]]) #[[ATTR12:[0-9]+]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_acosf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14:[0-9]+]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR12:[0-9]+]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_acosf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X]]) #[[ATTR14:[0-9]+]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X]]) #[[ATTR12:[0-9]+]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_acosf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X]]) #[[ATTR14:[0-9]+]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X]]) #[[ATTR12:[0-9]+]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_acosf( @@ -486,25 +486,25 @@ extern "C" __device__ float test_acosf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_acos( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X]]) #[[ATTR12]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_acos( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acos_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acos_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR12]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_acos( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_acos( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X]]) #[[ATTR12]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_acos( @@ -520,25 +520,25 @@ extern "C" __device__ double test_acos(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_acoshf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X]]) #[[ATTR15:[0-9]+]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X]]) #[[ATTR13:[0-9]+]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_acoshf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acosh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15:[0-9]+]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acosh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR13:[0-9]+]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_acoshf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X]]) #[[ATTR15:[0-9]+]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X]]) #[[ATTR13:[0-9]+]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_acoshf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X]]) #[[ATTR15:[0-9]+]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X]]) #[[ATTR13:[0-9]+]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_acoshf( @@ -554,25 +554,25 @@ extern "C" __device__ float test_acoshf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_acosh( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_acosh( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acosh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acosh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_acosh( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_acosh( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_acosh( @@ -588,25 +588,25 @@ extern "C" __device__ double test_acosh(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_asinf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X]]) #[[ATTR12]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_asinf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR12]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_asinf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_asinf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X]]) #[[ATTR12]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_asinf( @@ -622,25 +622,25 @@ extern "C" __device__ float test_asinf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_asin( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X]]) #[[ATTR12]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_asin( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asin_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asin_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR12]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_asin( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_asin( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X]]) #[[ATTR12]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_asin( @@ -657,25 +657,25 @@ extern "C" __device__ double test_asin(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_asinhf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_asinhf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asinh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asinh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_asinhf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_asinhf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_asinhf( @@ -691,25 +691,25 @@ extern "C" __device__ float test_asinhf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_asinh( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_asinh( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asinh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asinh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_asinh( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_asinh( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_asinh( @@ -725,25 +725,25 @@ extern "C" __device__ double test_asinh(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_atan2f( // DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_atan2f( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan2_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan2_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR12]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_atan2f( // APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_atan2f( // NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_atan2f( @@ -759,25 +759,25 @@ extern "C" __device__ float test_atan2f(float x, float y) { // DEFAULT-LABEL: define dso_local noundef double @test_atan2( // DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_atan2( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan2_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan2_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR12]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_atan2( // APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_atan2( // NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_atan2( @@ -793,25 +793,25 @@ extern "C" __device__ double test_atan2(double x, double y) { // DEFAULT-LABEL: define dso_local noundef float @test_atanf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X]]) #[[ATTR12]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_atanf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR12]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_atanf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_atanf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X]]) #[[ATTR12]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_atanf( @@ -827,25 +827,25 @@ extern "C" __device__ float test_atanf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_atan( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X]]) #[[ATTR12]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_atan( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR12]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_atan( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_atan( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X]]) #[[ATTR12]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_atan( @@ -861,25 +861,25 @@ extern "C" __device__ double test_atan(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_atanhf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_atanhf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atanh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atanh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_atanhf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_atanhf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_atanhf( @@ -895,25 +895,25 @@ extern "C" __device__ float test_atanhf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_atanh( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_atanh( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atanh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atanh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_atanh( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_atanh( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_atanh( @@ -929,25 +929,25 @@ extern "C" __device__ double test_atanh(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_cbrtf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_cbrtf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cbrt_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cbrt_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_cbrtf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_cbrtf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_cbrtf( @@ -963,25 +963,25 @@ extern "C" __device__ float test_cbrtf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_cbrt( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cbrt( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cbrt_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cbrt_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_cbrt( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_cbrt( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cbrt( @@ -1133,25 +1133,25 @@ extern "C" __device__ double test_copysign(double x, double y) { // DEFAULT-LABEL: define dso_local noundef float @test_cosf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X]]) #[[ATTR16:[0-9]+]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X]]) #[[ATTR14:[0-9]+]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_cosf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16:[0-9]+]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14:[0-9]+]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_cosf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16:[0-9]+]] +// APPROX-NEXT: [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14:[0-9]+]] // APPROX-NEXT: ret float [[CALL_I1]] // // NCRDIV-LABEL: define dso_local noundef float @test_cosf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X]]) #[[ATTR16:[0-9]+]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X]]) #[[ATTR14:[0-9]+]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_cosf( @@ -1167,25 +1167,25 @@ extern "C" __device__ float test_cosf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_cos( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cos( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cos_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cos_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_cos( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_cos( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cos( @@ -1201,25 +1201,25 @@ extern "C" __device__ double test_cos(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_coshf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_coshf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cosh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cosh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_coshf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_coshf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_coshf( @@ -1235,25 +1235,25 @@ extern "C" __device__ float test_coshf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_cosh( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cosh( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cosh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cosh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_cosh( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_cosh( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cosh( @@ -1269,25 +1269,25 @@ extern "C" __device__ double test_cosh(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_cospif( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_cospif( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cospi_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cospi_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_cospif( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_cospif( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_cospif( @@ -1303,25 +1303,25 @@ extern "C" __device__ float test_cospif(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_cospi( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cospi( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cospi_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cospi_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_cospi( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_cospi( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cospi( @@ -1337,25 +1337,25 @@ extern "C" __device__ double test_cospi(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_cyl_bessel_i0f( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_cyl_bessel_i0f( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i0_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i0_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_cyl_bessel_i0f( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_cyl_bessel_i0f( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_cyl_bessel_i0f( @@ -1371,25 +1371,25 @@ extern "C" __device__ float test_cyl_bessel_i0f(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_cyl_bessel_i0( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cyl_bessel_i0( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i0_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i0_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_cyl_bessel_i0( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_cyl_bessel_i0( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cyl_bessel_i0( @@ -1405,25 +1405,25 @@ extern "C" __device__ double test_cyl_bessel_i0(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_cyl_bessel_i1f( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_cyl_bessel_i1f( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i1_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i1_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_cyl_bessel_i1f( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_cyl_bessel_i1f( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_cyl_bessel_i1f( @@ -1439,25 +1439,25 @@ extern "C" __device__ float test_cyl_bessel_i1f(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_cyl_bessel_i1( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cyl_bessel_i1( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i1_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i1_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_cyl_bessel_i1( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_cyl_bessel_i1( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cyl_bessel_i1( @@ -1473,25 +1473,25 @@ extern "C" __device__ double test_cyl_bessel_i1(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_erfcf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_erfcf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfc_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfc_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_erfcf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_erfcf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_erfcf( @@ -1507,25 +1507,25 @@ extern "C" __device__ float test_erfcf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_erfc( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_erfc( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfc_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfc_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_erfc( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_erfc( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_erfc( @@ -1541,25 +1541,25 @@ extern "C" __device__ double test_erfc(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_erfinvf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_erfinvf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfinv_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfinv_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_erfinvf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_erfinvf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_erfinvf( @@ -1575,25 +1575,25 @@ extern "C" __device__ float test_erfinvf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_erfinv( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_erfinv( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfinv_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfinv_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_erfinv( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_erfinv( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_erfinv( @@ -1643,25 +1643,25 @@ extern "C" __device__ float test_exp10f(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_exp10( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_exp10( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp10_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp10_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_exp10( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_exp10( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_exp10( @@ -1711,25 +1711,25 @@ extern "C" __device__ float test_exp2f(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_exp2( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_exp2( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp2_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp2_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_exp2( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_exp2( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_exp2( @@ -1779,25 +1779,25 @@ extern "C" __device__ float test_expf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_exp( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_exp( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_exp( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_exp( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_exp( @@ -1813,25 +1813,25 @@ extern "C" __device__ double test_exp(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_expm1f( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_expm1f( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_expm1_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_expm1_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_expm1f( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_expm1f( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_expm1f( @@ -1847,25 +1847,25 @@ extern "C" __device__ float test_expm1f(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_expm1( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_expm1( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_expm1_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_expm1_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_expm1( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_expm1( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_expm1( @@ -1949,25 +1949,25 @@ extern "C" __device__ double test_fabs(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_fdimf( // DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_fdimf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fdim_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fdim_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR12]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_fdimf( // APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_fdimf( // NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fdimf( @@ -1983,25 +1983,25 @@ extern "C" __device__ float test_fdimf(float x, float y) { // DEFAULT-LABEL: define dso_local noundef double @test_fdim( // DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_fdim( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fdim_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fdim_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR12]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_fdim( // APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_fdim( // NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_fdim( @@ -2035,7 +2035,7 @@ extern "C" __device__ double test_fdim(double x, double y) { // NCRDIV-LABEL: define dso_local noundef float @test_fdividef( // NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]], !fpmath [[META12:![0-9]+]] +// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]], !fpmath [[META14:![0-9]+]] // NCRDIV-NEXT: ret float [[DIV_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fdividef( @@ -2357,25 +2357,25 @@ extern "C" __device__ double test_fmin(double x, double y) { // DEFAULT-LABEL: define dso_local noundef float @test_fmodf( // DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_fmodf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fmod_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fmod_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR12]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_fmodf( // APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_fmodf( // NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fmodf( @@ -2391,25 +2391,25 @@ extern "C" __device__ float test_fmodf(float x, float y) { // DEFAULT-LABEL: define dso_local noundef double @test_fmod( // DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_fmod( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fmod_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fmod_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR12]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_fmod( // APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_fmod( // NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_fmod( @@ -2427,7 +2427,7 @@ extern "C" __device__ double test_fmod(double x, double y) { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) // DEFAULT-NEXT: [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1 -// DEFAULT-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] +// DEFAULT-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA4:![0-9]+]] // DEFAULT-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0 // DEFAULT-NEXT: ret float [[TMP2]] // @@ -2436,7 +2436,7 @@ extern "C" __device__ double test_fmod(double x, double y) { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1 -// FINITEONLY-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] +// FINITEONLY-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA4:![0-9]+]] // FINITEONLY-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0 // FINITEONLY-NEXT: ret float [[TMP2]] // @@ -2445,7 +2445,7 @@ extern "C" __device__ double test_fmod(double x, double y) { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) // APPROX-NEXT: [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1 -// APPROX-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] +// APPROX-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA4:![0-9]+]] // APPROX-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0 // APPROX-NEXT: ret float [[TMP2]] // @@ -2454,7 +2454,7 @@ extern "C" __device__ double test_fmod(double x, double y) { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) // NCRDIV-NEXT: [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1 -// NCRDIV-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA13:![0-9]+]] +// NCRDIV-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA4:![0-9]+]] // NCRDIV-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0 // NCRDIV-NEXT: ret float [[TMP2]] // @@ -2463,7 +2463,7 @@ extern "C" __device__ double test_fmod(double x, double y) { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) { float, i32 } @llvm.frexp.f32.i32(float [[X]]) // AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1 -// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[Y]], align 4, !tbaa [[INT_TBAA13:![0-9]+]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[Y]], align 4, !tbaa [[INT_TBAA5:![0-9]+]] // AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0 // AMDGCNSPIRV-NEXT: ret float [[TMP2]] // @@ -2476,7 +2476,7 @@ extern "C" __device__ float test_frexpf(float x, int* y) { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double [[X]]) // DEFAULT-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1 -// DEFAULT-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12]] +// DEFAULT-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA4]] // DEFAULT-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0 // DEFAULT-NEXT: ret double [[TMP2]] // @@ -2485,7 +2485,7 @@ extern "C" __device__ float test_frexpf(float x, int* y) { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1 -// FINITEONLY-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12]] +// FINITEONLY-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA4]] // FINITEONLY-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0 // FINITEONLY-NEXT: ret double [[TMP2]] // @@ -2494,7 +2494,7 @@ extern "C" __device__ float test_frexpf(float x, int* y) { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double [[X]]) // APPROX-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1 -// APPROX-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12]] +// APPROX-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA4]] // APPROX-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0 // APPROX-NEXT: ret double [[TMP2]] // @@ -2503,7 +2503,7 @@ extern "C" __device__ float test_frexpf(float x, int* y) { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double [[X]]) // NCRDIV-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1 -// NCRDIV-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA13]] +// NCRDIV-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA4]] // NCRDIV-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0 // NCRDIV-NEXT: ret double [[TMP2]] // @@ -2512,7 +2512,7 @@ extern "C" __device__ float test_frexpf(float x, int* y) { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) { double, i32 } @llvm.frexp.f64.i32(double [[X]]) // AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1 -// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[Y]], align 4, !tbaa [[INT_TBAA13]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[Y]], align 4, !tbaa [[INT_TBAA5]] // AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0 // AMDGCNSPIRV-NEXT: ret double [[TMP2]] // @@ -2523,25 +2523,25 @@ extern "C" __device__ double test_frexp(double x, int* y) { // DEFAULT-LABEL: define dso_local noundef float @test_hypotf( // DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_hypotf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_hypot_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_hypot_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR12]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_hypotf( // APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_hypotf( // NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_hypotf( @@ -2557,25 +2557,25 @@ extern "C" __device__ float test_hypotf(float x, float y) { // DEFAULT-LABEL: define dso_local noundef double @test_hypot( // DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_hypot( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_hypot_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_hypot_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR12]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_hypot( // APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_hypot( // NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_hypot( @@ -2591,25 +2591,25 @@ extern "C" __device__ double test_hypot(double x, double y) { // DEFAULT-LABEL: define dso_local noundef i32 @test_ilogbf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X]]) #[[ATTR12]] // DEFAULT-NEXT: ret i32 [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef i32 @test_ilogbf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR12]] // FINITEONLY-NEXT: ret i32 [[CALL_I]] // // APPROX-LABEL: define dso_local noundef i32 @test_ilogbf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X]]) #[[ATTR12]] // APPROX-NEXT: ret i32 [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef i32 @test_ilogbf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X]]) #[[ATTR12]] // NCRDIV-NEXT: ret i32 [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef i32 @test_ilogbf( @@ -2625,25 +2625,25 @@ extern "C" __device__ int test_ilogbf(float x) { // DEFAULT-LABEL: define dso_local noundef i32 @test_ilogb( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X]]) #[[ATTR12]] // DEFAULT-NEXT: ret i32 [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef i32 @test_ilogb( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR12]] // FINITEONLY-NEXT: ret i32 [[CALL_I]] // // APPROX-LABEL: define dso_local noundef i32 @test_ilogb( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X]]) #[[ATTR12]] // APPROX-NEXT: ret i32 [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef i32 @test_ilogb( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X]]) #[[ATTR12]] // NCRDIV-NEXT: ret i32 [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef i32 @test_ilogb( @@ -2897,25 +2897,25 @@ extern "C" __device__ BOOL_TYPE test___isnan(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_j0f( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_j0f( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_j0f( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_j0f( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_j0f( @@ -2931,25 +2931,25 @@ extern "C" __device__ float test_j0f(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_j0( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_j0( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_j0( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_j0( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_j0( @@ -2965,25 +2965,25 @@ extern "C" __device__ double test_j0(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_j1f( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_j1f( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_j1f( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_j1f( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_j1f( @@ -2999,25 +2999,25 @@ extern "C" __device__ float test_j1f(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_j1( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_j1( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_j1( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_j1( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_j1( @@ -3038,14 +3038,14 @@ extern "C" __device__ double test_j1(double x) { // DEFAULT-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // DEFAULT-NEXT: ] // DEFAULT: [[IF_THEN_I]]: -// DEFAULT-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: br label %[[_ZL3JNFIF_EXIT:.*]] // DEFAULT: [[IF_THEN2_I]]: -// DEFAULT-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: br label %[[_ZL3JNFIF_EXIT]] // DEFAULT: [[IF_END4_I]]: -// DEFAULT-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]] -// DEFAULT-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 // DEFAULT-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3JNFIF_EXIT]] // DEFAULT: [[FOR_BODY_I]]: @@ -3072,14 +3072,14 @@ extern "C" __device__ double test_j1(double x) { // FINITEONLY-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // FINITEONLY-NEXT: ] // FINITEONLY: [[IF_THEN_I]]: -// FINITEONLY-NEXT: [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: br label %[[_ZL3JNFIF_EXIT:.*]] // FINITEONLY: [[IF_THEN2_I]]: -// FINITEONLY-NEXT: [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: br label %[[_ZL3JNFIF_EXIT]] // FINITEONLY: [[IF_END4_I]]: -// FINITEONLY-NEXT: [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] -// FINITEONLY-NEXT: [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 // FINITEONLY-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3JNFIF_EXIT]] // FINITEONLY: [[FOR_BODY_I]]: @@ -3106,14 +3106,14 @@ extern "C" __device__ double test_j1(double x) { // APPROX-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // APPROX-NEXT: ] // APPROX: [[IF_THEN_I]]: -// APPROX-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: br label %[[_ZL3JNFIF_EXIT:.*]] // APPROX: [[IF_THEN2_I]]: -// APPROX-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: br label %[[_ZL3JNFIF_EXIT]] // APPROX: [[IF_END4_I]]: -// APPROX-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]] -// APPROX-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 // APPROX-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3JNFIF_EXIT]] // APPROX: [[FOR_BODY_I]]: @@ -3140,14 +3140,14 @@ extern "C" __device__ double test_j1(double x) { // NCRDIV-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // NCRDIV-NEXT: ] // NCRDIV: [[IF_THEN_I]]: -// NCRDIV-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: br label %[[_ZL3JNFIF_EXIT:.*]] // NCRDIV: [[IF_THEN2_I]]: -// NCRDIV-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: br label %[[_ZL3JNFIF_EXIT]] // NCRDIV: [[IF_END4_I]]: -// NCRDIV-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]] -// NCRDIV-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 // NCRDIV-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3JNFIF_EXIT]] // NCRDIV: [[FOR_BODY_I]]: @@ -3156,7 +3156,7 @@ extern "C" __device__ double test_j1(double x) { // NCRDIV-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // NCRDIV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // NCRDIV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float -// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]], !fpmath [[META12]] +// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]], !fpmath [[META14]] // NCRDIV-NEXT: [[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], [[DIV_I]] // NCRDIV-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] // NCRDIV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 @@ -3212,14 +3212,14 @@ extern "C" __device__ float test_jnf(int x, float y) { // DEFAULT-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // DEFAULT-NEXT: ] // DEFAULT: [[IF_THEN_I]]: -// DEFAULT-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: br label %[[_ZL2JNID_EXIT:.*]] // DEFAULT: [[IF_THEN2_I]]: -// DEFAULT-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: br label %[[_ZL2JNID_EXIT]] // DEFAULT: [[IF_END4_I]]: -// DEFAULT-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]] -// DEFAULT-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 // DEFAULT-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2JNID_EXIT]] // DEFAULT: [[FOR_BODY_I]]: @@ -3246,14 +3246,14 @@ extern "C" __device__ float test_jnf(int x, float y) { // FINITEONLY-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // FINITEONLY-NEXT: ] // FINITEONLY: [[IF_THEN_I]]: -// FINITEONLY-NEXT: [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: br label %[[_ZL2JNID_EXIT:.*]] // FINITEONLY: [[IF_THEN2_I]]: -// FINITEONLY-NEXT: [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: br label %[[_ZL2JNID_EXIT]] // FINITEONLY: [[IF_END4_I]]: -// FINITEONLY-NEXT: [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] -// FINITEONLY-NEXT: [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 // FINITEONLY-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2JNID_EXIT]] // FINITEONLY: [[FOR_BODY_I]]: @@ -3280,14 +3280,14 @@ extern "C" __device__ float test_jnf(int x, float y) { // APPROX-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // APPROX-NEXT: ] // APPROX: [[IF_THEN_I]]: -// APPROX-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: br label %[[_ZL2JNID_EXIT:.*]] // APPROX: [[IF_THEN2_I]]: -// APPROX-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: br label %[[_ZL2JNID_EXIT]] // APPROX: [[IF_END4_I]]: -// APPROX-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]] -// APPROX-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 // APPROX-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2JNID_EXIT]] // APPROX: [[FOR_BODY_I]]: @@ -3314,14 +3314,14 @@ extern "C" __device__ float test_jnf(int x, float y) { // NCRDIV-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // NCRDIV-NEXT: ] // NCRDIV: [[IF_THEN_I]]: -// NCRDIV-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: br label %[[_ZL2JNID_EXIT:.*]] // NCRDIV: [[IF_THEN2_I]]: -// NCRDIV-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: br label %[[_ZL2JNID_EXIT]] // NCRDIV: [[IF_END4_I]]: -// NCRDIV-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]] -// NCRDIV-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 // NCRDIV-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2JNID_EXIT]] // NCRDIV: [[FOR_BODY_I]]: @@ -3449,25 +3449,25 @@ extern "C" __device__ double test_ldexp(double x, int y) { // DEFAULT-LABEL: define dso_local noundef float @test_lgammaf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_lgammaf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_lgamma_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_lgamma_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_lgammaf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_lgammaf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_lgammaf( @@ -3483,25 +3483,25 @@ extern "C" __device__ float test_lgammaf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_lgamma( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_lgamma( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_lgamma_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_lgamma_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_lgamma( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_lgamma( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_lgamma( @@ -3707,25 +3707,25 @@ extern "C" __device__ float test_log10f(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_log10( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_log10( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log10_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log10_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_log10( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_log10( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_log10( @@ -3741,25 +3741,25 @@ extern "C" __device__ double test_log10(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_log1pf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_log1pf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_log1p_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_log1p_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_log1pf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_log1pf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_log1pf( @@ -3775,25 +3775,25 @@ extern "C" __device__ float test_log1pf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_log1p( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_log1p( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log1p_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log1p_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_log1p( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_log1p( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_log1p( @@ -3843,25 +3843,25 @@ extern "C" __device__ float test_log2f(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_log2( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_log2( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log2_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log2_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_log2( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_log2( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_log2( @@ -3877,25 +3877,25 @@ extern "C" __device__ double test_log2(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_logbf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X]]) #[[ATTR12]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_logbf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_logb_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_logb_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR12]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_logbf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_logbf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X]]) #[[ATTR12]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_logbf( @@ -3911,25 +3911,25 @@ extern "C" __device__ float test_logbf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_logb( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X]]) #[[ATTR12]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_logb( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_logb_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_logb_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR12]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_logb( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_logb( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X]]) #[[ATTR12]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_logb( @@ -4136,44 +4136,44 @@ extern "C" __device__ long int test_lround(double x) { // DEFAULT-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) -// DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15:[0-9]+]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16:![0-9]+]] // DEFAULT-NEXT: store float [[TMP0]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] -// DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_modff( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) -// FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_modf_f32(float noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15:[0-9]+]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_modf_f32(float noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // FINITEONLY-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16:![0-9]+]] // FINITEONLY-NEXT: store float [[TMP0]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] -// FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_modff( // APPROX-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) -// APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]] -// APPROX-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15:[0-9]+]] +// APPROX-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // APPROX-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16:![0-9]+]] // APPROX-NEXT: store float [[TMP0]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] -// APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_modff( // NCRDIV-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) -// NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15:[0-9]+]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // NCRDIV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA17:![0-9]+]] // NCRDIV-NEXT: store float [[TMP0]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA17]] -// NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_modff( @@ -4196,44 +4196,44 @@ extern "C" __device__ float test_modff(float x, float* y) { // DEFAULT-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) -// DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18:![0-9]+]] // DEFAULT-NEXT: store double [[TMP0]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] -// DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_modf( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) -// FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_modf_f64(double noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_modf_f64(double noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // FINITEONLY-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18:![0-9]+]] // FINITEONLY-NEXT: store double [[TMP0]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] -// FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_modf( // APPROX-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) -// APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// APPROX-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // APPROX-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18:![0-9]+]] // APPROX-NEXT: store double [[TMP0]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] -// APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_modf( // NCRDIV-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) -// NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // NCRDIV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA19:![0-9]+]] // NCRDIV-NEXT: store double [[TMP0]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA19]] -// NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_modf( @@ -4255,26 +4255,26 @@ extern "C" __device__ double test_modf(double x, double* y) { // DEFAULT-LABEL: define dso_local float @test_nanf( // DEFAULT-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA8]] // DEFAULT-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 // DEFAULT-NEXT: br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]] // DEFAULT: [[WHILE_COND_I14_I_I_PREHEADER]]: -// DEFAULT-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA8]] // DEFAULT-NEXT: [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0 // DEFAULT-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL4NANFPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]] // DEFAULT: [[IF_THEN_I_I]]: // DEFAULT-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1 -// DEFAULT-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // DEFAULT-NEXT: switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [ // DEFAULT-NEXT: i8 120, label %[[IF_THEN5_I_I:.*]] // DEFAULT-NEXT: i8 88, label %[[IF_THEN5_I_I]] // DEFAULT-NEXT: ] // DEFAULT: [[WHILE_COND_I_I_I_PREHEADER]]: -// DEFAULT-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // DEFAULT-NEXT: [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0 // DEFAULT-NEXT: br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]] // DEFAULT: [[IF_THEN5_I_I]]: -// DEFAULT-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // DEFAULT-NEXT: [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0 // DEFAULT-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]] // DEFAULT: [[WHILE_BODY_I31_I_I]]: @@ -4299,9 +4299,9 @@ extern "C" __device__ double test_modf(double x, double* y) { // DEFAULT-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // DEFAULT-NEXT: [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] // DEFAULT-NEXT: [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1 -// DEFAULT-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // DEFAULT-NEXT: [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] +// DEFAULT-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP13]] // DEFAULT: [[WHILE_BODY_I_I_I]]: // DEFAULT-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ] // DEFAULT-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ] @@ -4315,9 +4315,9 @@ extern "C" __device__ double test_modf(double x, double* y) { // DEFAULT-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 // DEFAULT-NEXT: [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] // DEFAULT-NEXT: [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1 -// DEFAULT-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // DEFAULT-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] +// DEFAULT-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP9]] // DEFAULT: [[WHILE_BODY_I18_I_I]]: // DEFAULT-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] // DEFAULT-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ] @@ -4331,9 +4331,9 @@ extern "C" __device__ double test_modf(double x, double* y) { // DEFAULT-NEXT: [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48 // DEFAULT-NEXT: [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]] // DEFAULT-NEXT: [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1 -// DEFAULT-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // DEFAULT-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] +// DEFAULT-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP12]] // DEFAULT: [[_ZL4NANFPKC_EXIT]]: // DEFAULT-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ] // DEFAULT-NEXT: [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32 @@ -4350,26 +4350,26 @@ extern "C" __device__ double test_modf(double x, double* y) { // APPROX-LABEL: define dso_local float @test_nanf( // APPROX-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA8]] // APPROX-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 // APPROX-NEXT: br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]] // APPROX: [[WHILE_COND_I14_I_I_PREHEADER]]: -// APPROX-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA8]] // APPROX-NEXT: [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0 // APPROX-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL4NANFPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]] // APPROX: [[IF_THEN_I_I]]: // APPROX-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1 -// APPROX-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // APPROX-NEXT: switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [ // APPROX-NEXT: i8 120, label %[[IF_THEN5_I_I:.*]] // APPROX-NEXT: i8 88, label %[[IF_THEN5_I_I]] // APPROX-NEXT: ] // APPROX: [[WHILE_COND_I_I_I_PREHEADER]]: -// APPROX-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // APPROX-NEXT: [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0 // APPROX-NEXT: br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]] // APPROX: [[IF_THEN5_I_I]]: -// APPROX-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // APPROX-NEXT: [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0 // APPROX-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]] // APPROX: [[WHILE_BODY_I31_I_I]]: @@ -4394,9 +4394,9 @@ extern "C" __device__ double test_modf(double x, double* y) { // APPROX-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // APPROX-NEXT: [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] // APPROX-NEXT: [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1 -// APPROX-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // APPROX-NEXT: [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] +// APPROX-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP13]] // APPROX: [[WHILE_BODY_I_I_I]]: // APPROX-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ] // APPROX-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ] @@ -4410,9 +4410,9 @@ extern "C" __device__ double test_modf(double x, double* y) { // APPROX-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 // APPROX-NEXT: [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] // APPROX-NEXT: [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1 -// APPROX-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // APPROX-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] +// APPROX-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP9]] // APPROX: [[WHILE_BODY_I18_I_I]]: // APPROX-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] // APPROX-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ] @@ -4426,9 +4426,9 @@ extern "C" __device__ double test_modf(double x, double* y) { // APPROX-NEXT: [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48 // APPROX-NEXT: [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]] // APPROX-NEXT: [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1 -// APPROX-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // APPROX-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] +// APPROX-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP12]] // APPROX: [[_ZL4NANFPKC_EXIT]]: // APPROX-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ] // APPROX-NEXT: [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32 @@ -4440,26 +4440,26 @@ extern "C" __device__ double test_modf(double x, double* y) { // NCRDIV-LABEL: define dso_local float @test_nanf( // NCRDIV-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA8]] // NCRDIV-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 // NCRDIV-NEXT: br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]] // NCRDIV: [[WHILE_COND_I14_I_I_PREHEADER]]: -// NCRDIV-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA8]] // NCRDIV-NEXT: [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0 // NCRDIV-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL4NANFPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]] // NCRDIV: [[IF_THEN_I_I]]: // NCRDIV-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1 -// NCRDIV-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // NCRDIV-NEXT: switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [ // NCRDIV-NEXT: i8 120, label %[[IF_THEN5_I_I:.*]] // NCRDIV-NEXT: i8 88, label %[[IF_THEN5_I_I]] // NCRDIV-NEXT: ] // NCRDIV: [[WHILE_COND_I_I_I_PREHEADER]]: -// NCRDIV-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // NCRDIV-NEXT: [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0 // NCRDIV-NEXT: br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]] // NCRDIV: [[IF_THEN5_I_I]]: -// NCRDIV-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // NCRDIV-NEXT: [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0 // NCRDIV-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]] // NCRDIV: [[WHILE_BODY_I31_I_I]]: @@ -4484,9 +4484,9 @@ extern "C" __device__ double test_modf(double x, double* y) { // NCRDIV-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // NCRDIV-NEXT: [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] // NCRDIV-NEXT: [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1 -// NCRDIV-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // NCRDIV-NEXT: [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] +// NCRDIV-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP13]] // NCRDIV: [[WHILE_BODY_I_I_I]]: // NCRDIV-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ] // NCRDIV-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ] @@ -4500,9 +4500,9 @@ extern "C" __device__ double test_modf(double x, double* y) { // NCRDIV-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 // NCRDIV-NEXT: [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] // NCRDIV-NEXT: [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1 -// NCRDIV-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // NCRDIV-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] +// NCRDIV-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP9]] // NCRDIV: [[WHILE_BODY_I18_I_I]]: // NCRDIV-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] // NCRDIV-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ] @@ -4516,9 +4516,9 @@ extern "C" __device__ double test_modf(double x, double* y) { // NCRDIV-NEXT: [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48 // NCRDIV-NEXT: [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]] // NCRDIV-NEXT: [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1 -// NCRDIV-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // NCRDIV-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] +// NCRDIV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP12]] // NCRDIV: [[_ZL4NANFPKC_EXIT]]: // NCRDIV-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ] // NCRDIV-NEXT: [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32 @@ -4530,18 +4530,18 @@ extern "C" __device__ double test_modf(double x, double* y) { // AMDGCNSPIRV-LABEL: define spir_func float @test_nanf( // AMDGCNSPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*]]: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[TAG]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[TAG]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 // AMDGCNSPIRV-NEXT: br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I:.*]] // AMDGCNSPIRV: [[IF_THEN_I_I]]: // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TAG]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: switch i8 [[TMP1]], label %[[WHILE_COND_I_I_I:.*]] [ // AMDGCNSPIRV-NEXT: i8 120, label %[[IF_THEN5_I_I:.*]] // AMDGCNSPIRV-NEXT: i8 88, label %[[IF_THEN5_I_I]] // AMDGCNSPIRV-NEXT: ] // AMDGCNSPIRV: [[IF_THEN5_I_I]]: -// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I_I5:%.*]] = icmp eq i8 [[TMP2]], 0 // AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I5]], label %[[_ZL4NANFPKC_EXIT:.*]], label %[[WHILE_BODY_I32_I_I:.*]] // AMDGCNSPIRV: [[WHILE_BODY_I32_I_I]]: @@ -4566,13 +4566,13 @@ extern "C" __device__ double test_modf(double x, double* y) { // AMDGCNSPIRV-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // AMDGCNSPIRV-NEXT: [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I36_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I6]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP7]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I32_I_I]], !llvm.loop [[LOOP12]] +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I32_I_I]], !llvm.loop [[LOOP14]] // AMDGCNSPIRV: [[WHILE_COND_I_I_I]]: // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], %[[WHILE_BODY_I_I_I:.*]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN_I_I]] ] // AMDGCNSPIRV-NEXT: [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], %[[WHILE_BODY_I_I_I]] ], [ 0, %[[IF_THEN_I_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP8]], 0 // AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I]] // AMDGCNSPIRV: [[WHILE_BODY_I_I_I]]: @@ -4585,11 +4585,11 @@ extern "C" __device__ double test_modf(double x, double* y) { // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I_I]] to i64 // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], i64 [[__TAGP_ADDR_1_I_I_I_IDX]] // AMDGCNSPIRV-NEXT: [[__R_1_I_I_I]] = select i1 [[OR_COND_I_I_I]], i64 [[SUB_I_I_I]], i64 [[__R_0_I_I_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I_I]], label %[[WHILE_COND_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP8]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I_I]], label %[[WHILE_COND_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP10]] // AMDGCNSPIRV: [[WHILE_COND_I14_I_I]]: // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], %[[WHILE_BODY_I18_I_I:.*]] ], [ [[TAG]], %[[ENTRY]] ] // AMDGCNSPIRV-NEXT: [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], %[[WHILE_BODY_I18_I_I]] ], [ 0, %[[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP10]], 0 // AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]] // AMDGCNSPIRV: [[WHILE_BODY_I18_I_I]]: @@ -4602,7 +4602,7 @@ extern "C" __device__ double test_modf(double x, double* y) { // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I_I]] to i64 // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], i64 [[__TAGP_ADDR_1_I25_I_I_IDX]] // AMDGCNSPIRV-NEXT: [[__R_1_I26_I_I]] = select i1 [[OR_COND_I19_I_I]], i64 [[SUB_I23_I_I]], i64 [[__R_0_I16_I_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I_I]], label %[[WHILE_COND_I14_I_I]], label %[[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I_I]], label %[[WHILE_COND_I14_I_I]], label %[[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP13]] // AMDGCNSPIRV: [[_ZL4NANFPKC_EXIT]]: // AMDGCNSPIRV-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], %[[WHILE_COND_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], %[[WHILE_COND_I14_I_I]] ] // AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32 @@ -4618,26 +4618,26 @@ extern "C" __device__ float test_nanf(const char *tag) { // DEFAULT-LABEL: define dso_local double @test_nan( // DEFAULT-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA8]] // DEFAULT-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 // DEFAULT-NEXT: br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]] // DEFAULT: [[WHILE_COND_I14_I_I_PREHEADER]]: -// DEFAULT-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA8]] // DEFAULT-NEXT: [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0 // DEFAULT-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL3NANPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]] // DEFAULT: [[IF_THEN_I_I]]: // DEFAULT-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1 -// DEFAULT-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // DEFAULT-NEXT: switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [ // DEFAULT-NEXT: i8 120, label %[[IF_THEN5_I_I:.*]] // DEFAULT-NEXT: i8 88, label %[[IF_THEN5_I_I]] // DEFAULT-NEXT: ] // DEFAULT: [[WHILE_COND_I_I_I_PREHEADER]]: -// DEFAULT-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // DEFAULT-NEXT: [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0 // DEFAULT-NEXT: br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]] // DEFAULT: [[IF_THEN5_I_I]]: -// DEFAULT-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // DEFAULT-NEXT: [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0 // DEFAULT-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]] // DEFAULT: [[WHILE_BODY_I31_I_I]]: @@ -4662,9 +4662,9 @@ extern "C" __device__ float test_nanf(const char *tag) { // DEFAULT-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // DEFAULT-NEXT: [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] // DEFAULT-NEXT: [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1 -// DEFAULT-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // DEFAULT-NEXT: [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] +// DEFAULT-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP13]] // DEFAULT: [[WHILE_BODY_I_I_I]]: // DEFAULT-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ] // DEFAULT-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ] @@ -4678,9 +4678,9 @@ extern "C" __device__ float test_nanf(const char *tag) { // DEFAULT-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 // DEFAULT-NEXT: [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] // DEFAULT-NEXT: [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1 -// DEFAULT-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // DEFAULT-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] +// DEFAULT-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP9]] // DEFAULT: [[WHILE_BODY_I18_I_I]]: // DEFAULT-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] // DEFAULT-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ] @@ -4694,9 +4694,9 @@ extern "C" __device__ float test_nanf(const char *tag) { // DEFAULT-NEXT: [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48 // DEFAULT-NEXT: [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]] // DEFAULT-NEXT: [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1 -// DEFAULT-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // DEFAULT-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] +// DEFAULT-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP12]] // DEFAULT: [[_ZL3NANPKC_EXIT]]: // DEFAULT-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ] // DEFAULT-NEXT: [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247 @@ -4712,26 +4712,26 @@ extern "C" __device__ float test_nanf(const char *tag) { // APPROX-LABEL: define dso_local double @test_nan( // APPROX-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA8]] // APPROX-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 // APPROX-NEXT: br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]] // APPROX: [[WHILE_COND_I14_I_I_PREHEADER]]: -// APPROX-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA8]] // APPROX-NEXT: [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0 // APPROX-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL3NANPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]] // APPROX: [[IF_THEN_I_I]]: // APPROX-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1 -// APPROX-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // APPROX-NEXT: switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [ // APPROX-NEXT: i8 120, label %[[IF_THEN5_I_I:.*]] // APPROX-NEXT: i8 88, label %[[IF_THEN5_I_I]] // APPROX-NEXT: ] // APPROX: [[WHILE_COND_I_I_I_PREHEADER]]: -// APPROX-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // APPROX-NEXT: [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0 // APPROX-NEXT: br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]] // APPROX: [[IF_THEN5_I_I]]: -// APPROX-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // APPROX-NEXT: [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0 // APPROX-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]] // APPROX: [[WHILE_BODY_I31_I_I]]: @@ -4756,9 +4756,9 @@ extern "C" __device__ float test_nanf(const char *tag) { // APPROX-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // APPROX-NEXT: [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] // APPROX-NEXT: [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1 -// APPROX-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // APPROX-NEXT: [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] +// APPROX-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP13]] // APPROX: [[WHILE_BODY_I_I_I]]: // APPROX-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ] // APPROX-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ] @@ -4772,9 +4772,9 @@ extern "C" __device__ float test_nanf(const char *tag) { // APPROX-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 // APPROX-NEXT: [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] // APPROX-NEXT: [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1 -// APPROX-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // APPROX-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] +// APPROX-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP9]] // APPROX: [[WHILE_BODY_I18_I_I]]: // APPROX-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] // APPROX-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ] @@ -4788,9 +4788,9 @@ extern "C" __device__ float test_nanf(const char *tag) { // APPROX-NEXT: [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48 // APPROX-NEXT: [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]] // APPROX-NEXT: [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1 -// APPROX-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // APPROX-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] +// APPROX-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP12]] // APPROX: [[_ZL3NANPKC_EXIT]]: // APPROX-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ] // APPROX-NEXT: [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247 @@ -4801,26 +4801,26 @@ extern "C" __device__ float test_nanf(const char *tag) { // NCRDIV-LABEL: define dso_local double @test_nan( // NCRDIV-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA8]] // NCRDIV-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 // NCRDIV-NEXT: br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]] // NCRDIV: [[WHILE_COND_I14_I_I_PREHEADER]]: -// NCRDIV-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA8]] // NCRDIV-NEXT: [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0 // NCRDIV-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL3NANPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]] // NCRDIV: [[IF_THEN_I_I]]: // NCRDIV-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1 -// NCRDIV-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // NCRDIV-NEXT: switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [ // NCRDIV-NEXT: i8 120, label %[[IF_THEN5_I_I:.*]] // NCRDIV-NEXT: i8 88, label %[[IF_THEN5_I_I]] // NCRDIV-NEXT: ] // NCRDIV: [[WHILE_COND_I_I_I_PREHEADER]]: -// NCRDIV-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // NCRDIV-NEXT: [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0 // NCRDIV-NEXT: br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]] // NCRDIV: [[IF_THEN5_I_I]]: -// NCRDIV-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // NCRDIV-NEXT: [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0 // NCRDIV-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]] // NCRDIV: [[WHILE_BODY_I31_I_I]]: @@ -4845,9 +4845,9 @@ extern "C" __device__ float test_nanf(const char *tag) { // NCRDIV-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // NCRDIV-NEXT: [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] // NCRDIV-NEXT: [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1 -// NCRDIV-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // NCRDIV-NEXT: [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] +// NCRDIV-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP13]] // NCRDIV: [[WHILE_BODY_I_I_I]]: // NCRDIV-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ] // NCRDIV-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ] @@ -4861,9 +4861,9 @@ extern "C" __device__ float test_nanf(const char *tag) { // NCRDIV-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 // NCRDIV-NEXT: [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] // NCRDIV-NEXT: [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1 -// NCRDIV-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // NCRDIV-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] +// NCRDIV-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP9]] // NCRDIV: [[WHILE_BODY_I18_I_I]]: // NCRDIV-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] // NCRDIV-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ] @@ -4877,9 +4877,9 @@ extern "C" __device__ float test_nanf(const char *tag) { // NCRDIV-NEXT: [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48 // NCRDIV-NEXT: [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]] // NCRDIV-NEXT: [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1 -// NCRDIV-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA8]] // NCRDIV-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] +// NCRDIV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP12]] // NCRDIV: [[_ZL3NANPKC_EXIT]]: // NCRDIV-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ] // NCRDIV-NEXT: [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247 @@ -4890,18 +4890,18 @@ extern "C" __device__ float test_nanf(const char *tag) { // AMDGCNSPIRV-LABEL: define spir_func double @test_nan( // AMDGCNSPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*]]: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[TAG]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[TAG]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 // AMDGCNSPIRV-NEXT: br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I:.*]] // AMDGCNSPIRV: [[IF_THEN_I_I]]: // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TAG]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: switch i8 [[TMP1]], label %[[WHILE_COND_I_I_I:.*]] [ // AMDGCNSPIRV-NEXT: i8 120, label %[[IF_THEN5_I_I:.*]] // AMDGCNSPIRV-NEXT: i8 88, label %[[IF_THEN5_I_I]] // AMDGCNSPIRV-NEXT: ] // AMDGCNSPIRV: [[IF_THEN5_I_I]]: -// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I_I5:%.*]] = icmp eq i8 [[TMP2]], 0 // AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I5]], label %[[_ZL3NANPKC_EXIT:.*]], label %[[WHILE_BODY_I32_I_I:.*]] // AMDGCNSPIRV: [[WHILE_BODY_I32_I_I]]: @@ -4926,13 +4926,13 @@ extern "C" __device__ float test_nanf(const char *tag) { // AMDGCNSPIRV-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // AMDGCNSPIRV-NEXT: [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I36_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I6]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP7]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I32_I_I]], !llvm.loop [[LOOP12]] +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I32_I_I]], !llvm.loop [[LOOP14]] // AMDGCNSPIRV: [[WHILE_COND_I_I_I]]: // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], %[[WHILE_BODY_I_I_I:.*]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN_I_I]] ] // AMDGCNSPIRV-NEXT: [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], %[[WHILE_BODY_I_I_I]] ], [ 0, %[[IF_THEN_I_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP8]], 0 // AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I]] // AMDGCNSPIRV: [[WHILE_BODY_I_I_I]]: @@ -4945,11 +4945,11 @@ extern "C" __device__ float test_nanf(const char *tag) { // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I_I]] to i64 // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], i64 [[__TAGP_ADDR_1_I_I_I_IDX]] // AMDGCNSPIRV-NEXT: [[__R_1_I_I_I]] = select i1 [[OR_COND_I_I_I]], i64 [[SUB_I_I_I]], i64 [[__R_0_I_I_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I_I]], label %[[WHILE_COND_I_I_I]], label %[[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP8]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I_I]], label %[[WHILE_COND_I_I_I]], label %[[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP10]] // AMDGCNSPIRV: [[WHILE_COND_I14_I_I]]: // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], %[[WHILE_BODY_I18_I_I:.*]] ], [ [[TAG]], %[[ENTRY]] ] // AMDGCNSPIRV-NEXT: [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], %[[WHILE_BODY_I18_I_I]] ], [ 0, %[[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[CHAR_TBAA9]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP10]], 0 // AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]] // AMDGCNSPIRV: [[WHILE_BODY_I18_I_I]]: @@ -4962,7 +4962,7 @@ extern "C" __device__ float test_nanf(const char *tag) { // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I_I]] to i64 // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], i64 [[__TAGP_ADDR_1_I25_I_I_IDX]] // AMDGCNSPIRV-NEXT: [[__R_1_I26_I_I]] = select i1 [[OR_COND_I19_I_I]], i64 [[SUB_I23_I_I]], i64 [[__R_0_I16_I_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I_I]], label %[[WHILE_COND_I14_I_I]], label %[[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I_I]], label %[[WHILE_COND_I14_I_I]], label %[[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP13]] // AMDGCNSPIRV: [[_ZL3NANPKC_EXIT]]: // AMDGCNSPIRV-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], %[[WHILE_COND_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], %[[WHILE_COND_I14_I_I]] ] // AMDGCNSPIRV-NEXT: [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247 @@ -5161,25 +5161,25 @@ extern "C" __device__ double test_nearbyint(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_nextafterf( // DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_nextafterf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_nextafter_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_nextafter_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR12]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_nextafterf( // APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_nextafterf( // NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_nextafterf( @@ -5195,25 +5195,25 @@ extern "C" __device__ float test_nextafterf(float x, float y) { // DEFAULT-LABEL: define dso_local noundef double @test_nextafter( // DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_nextafter( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_nextafter_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_nextafter_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR12]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_nextafter( // APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_nextafter( // NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_nextafter( @@ -5229,25 +5229,25 @@ extern "C" __device__ double test_nextafter(double x, double y) { // DEFAULT-LABEL: define dso_local noundef float @test_norm3df( // DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR12]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_norm3df( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len3_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], float noundef nofpclass(nan inf) [[Z]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len3_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], float noundef nofpclass(nan inf) [[Z]]) #[[ATTR12]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_norm3df( // APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_norm3df( // NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR12]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_norm3df( @@ -5263,25 +5263,25 @@ extern "C" __device__ float test_norm3df(float x, float y, float z) { // DEFAULT-LABEL: define dso_local noundef double @test_norm3d( // DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR12]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_norm3d( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len3_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], double noundef nofpclass(nan inf) [[Z]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len3_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], double noundef nofpclass(nan inf) [[Z]]) #[[ATTR12]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_norm3d( // APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_norm3d( // NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR12]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_norm3d( @@ -5297,25 +5297,25 @@ extern "C" __device__ double test_norm3d(double x, double y, double z) { // DEFAULT-LABEL: define dso_local noundef float @test_norm4df( // DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR12]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_norm4df( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len4_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], float noundef nofpclass(nan inf) [[Z]], float noundef nofpclass(nan inf) [[W]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len4_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], float noundef nofpclass(nan inf) [[Z]], float noundef nofpclass(nan inf) [[W]]) #[[ATTR12]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_norm4df( // APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_norm4df( // NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR12]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_norm4df( @@ -5331,25 +5331,25 @@ extern "C" __device__ float test_norm4df(float x, float y, float z, float w) { // DEFAULT-LABEL: define dso_local noundef double @test_norm4d( // DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR12]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_norm4d( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len4_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], double noundef nofpclass(nan inf) [[Z]], double noundef nofpclass(nan inf) [[W]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len4_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], double noundef nofpclass(nan inf) [[Z]], double noundef nofpclass(nan inf) [[W]]) #[[ATTR12]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_norm4d( // APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_norm4d( // NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR12]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_norm4d( @@ -5365,25 +5365,25 @@ extern "C" __device__ double test_norm4d(double x, double y, double z, double w) // DEFAULT-LABEL: define dso_local noundef float @test_normcdff( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_normcdff( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdf_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdf_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_normcdff( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_normcdff( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_normcdff( @@ -5399,25 +5399,25 @@ extern "C" __device__ float test_normcdff(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_normcdf( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_normcdf( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdf_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdf_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_normcdf( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_normcdf( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_normcdf( @@ -5433,25 +5433,25 @@ extern "C" __device__ double test_normcdf(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_normcdfinvf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_normcdfinvf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdfinv_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdfinv_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_normcdfinvf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_normcdfinvf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_normcdfinvf( @@ -5467,25 +5467,25 @@ extern "C" __device__ float test_normcdfinvf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_normcdfinv( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_normcdfinv( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdfinv_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdfinv_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_normcdfinv( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_normcdfinv( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_normcdfinv( @@ -5739,25 +5739,25 @@ extern "C" __device__ double test_norm(int x, const double *y) { // DEFAULT-LABEL: define dso_local noundef float @test_powf( // DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_powf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_powf( // APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_powf( // NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_powf( @@ -5773,25 +5773,25 @@ extern "C" __device__ float test_powf(float x, float y) { // DEFAULT-LABEL: define dso_local noundef double @test_pow( // DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_pow( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pow_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pow_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_pow( // APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_pow( // NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_pow( @@ -5807,25 +5807,25 @@ extern "C" __device__ double test_pow(double x, double y) { // DEFAULT-LABEL: define dso_local noundef float @test_powif( // DEFAULT-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X]], i32 noundef [[Y]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_powif( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pown_f32(float noundef nofpclass(nan inf) [[X]], i32 noundef [[Y]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pown_f32(float noundef nofpclass(nan inf) [[X]], i32 noundef [[Y]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_powif( // APPROX-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X]], i32 noundef [[Y]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_powif( // NCRDIV-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X]], i32 noundef [[Y]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_powif( @@ -5841,25 +5841,25 @@ extern "C" __device__ float test_powif(float x, int y) { // DEFAULT-LABEL: define dso_local noundef double @test_powi( // DEFAULT-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X]], i32 noundef [[Y]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_powi( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pown_f64(double noundef nofpclass(nan inf) [[X]], i32 noundef [[Y]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pown_f64(double noundef nofpclass(nan inf) [[X]], i32 noundef [[Y]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_powi( // APPROX-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X]], i32 noundef [[Y]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_powi( // NCRDIV-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X]], i32 noundef [[Y]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_powi( @@ -5875,25 +5875,25 @@ extern "C" __device__ double test_powi(double x, int y) { // DEFAULT-LABEL: define dso_local noundef float @test_rcbrtf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rcbrtf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rcbrt_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rcbrt_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_rcbrtf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_rcbrtf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rcbrtf( @@ -5909,25 +5909,25 @@ extern "C" __device__ float test_rcbrtf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_rcbrt( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rcbrt( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rcbrt_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rcbrt_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_rcbrt( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_rcbrt( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rcbrt( @@ -5943,25 +5943,25 @@ extern "C" __device__ double test_rcbrt(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_remainderf( // DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_remainderf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remainder_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remainder_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR12]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_remainderf( // APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_remainderf( // NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_remainderf( @@ -5977,25 +5977,25 @@ extern "C" __device__ float test_remainderf(float x, float y) { // DEFAULT-LABEL: define dso_local noundef double @test_remainder( // DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_remainder( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remainder_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remainder_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR12]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_remainder( // APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_remainder( // NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_remainder( @@ -6012,44 +6012,44 @@ extern "C" __device__ double test_remainder(double x, double y) { // DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5) -// DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X]], float noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]] -// DEFAULT-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]] -// DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X]], float noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA4]] +// DEFAULT-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA4]] +// DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_remquof( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5) -// FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remquo_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// FINITEONLY-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]] -// FINITEONLY-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]] -// FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remquo_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA4]] +// FINITEONLY-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA4]] +// FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_remquof( // APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5) -// APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// APPROX-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X]], float noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// APPROX-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]] -// APPROX-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]] -// APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X]], float noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] +// APPROX-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA4]] +// APPROX-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA4]] +// APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_remquof( // NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5) -// NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X]], float noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// NCRDIV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA13]] -// NCRDIV-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA13]] -// NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X]], float noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA4]] +// NCRDIV-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA4]] +// NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_remquof( @@ -6059,8 +6059,8 @@ extern "C" __device__ double test_remainder(double x, double y) { // AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) float @__ocml_remquo_f32(float noundef [[X]], float noundef [[Y]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[INT_TBAA13]] -// AMDGCNSPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[Z]], align 4, !tbaa [[INT_TBAA13]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[INT_TBAA5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[Z]], align 4, !tbaa [[INT_TBAA5]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // @@ -6072,44 +6072,44 @@ extern "C" __device__ float test_remquof(float x, float y, int* z) { // DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5) -// DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X]], double noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]] -// DEFAULT-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]] -// DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X]], double noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA4]] +// DEFAULT-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA4]] +// DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_remquo( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5) -// FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remquo_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// FINITEONLY-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]] -// FINITEONLY-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]] -// FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remquo_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA4]] +// FINITEONLY-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA4]] +// FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_remquo( // APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5) -// APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// APPROX-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X]], double noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// APPROX-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]] -// APPROX-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]] -// APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X]], double noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] +// APPROX-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA4]] +// APPROX-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA4]] +// APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_remquo( // NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5) -// NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X]], double noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// NCRDIV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA13]] -// NCRDIV-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA13]] -// NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X]], double noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA4]] +// NCRDIV-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA4]] +// NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_remquo( @@ -6119,8 +6119,8 @@ extern "C" __device__ float test_remquof(float x, float y, int* z) { // AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) double @__ocml_remquo_f64(double noundef [[X]], double noundef [[Y]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[INT_TBAA13]] -// AMDGCNSPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[Z]], align 4, !tbaa [[INT_TBAA13]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[INT_TBAA5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[Z]], align 4, !tbaa [[INT_TBAA5]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // @@ -6131,25 +6131,25 @@ extern "C" __device__ double test_remquo(double x, double y, int* z) { // DEFAULT-LABEL: define dso_local noundef float @test_rhypotf( // DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rhypotf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rhypot_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rhypot_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR12]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_rhypotf( // APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_rhypotf( // NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rhypotf( @@ -6165,25 +6165,25 @@ extern "C" __device__ float test_rhypotf(float x, float y) { // DEFAULT-LABEL: define dso_local noundef double @test_rhypot( // DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rhypot( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rhypot_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rhypot_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR12]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_rhypot( // APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_rhypot( // NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rhypot( @@ -6282,7 +6282,7 @@ extern "C" __device__ double test_rint(double x) { // DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL6RNORMFIPKF_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] // DEFAULT: [[_ZL6RNORMFIPKF_EXIT]]: // DEFAULT-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rnormf( @@ -6303,7 +6303,7 @@ extern "C" __device__ double test_rint(double x) { // FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL6RNORMFIPKF_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] // FINITEONLY: [[_ZL6RNORMFIPKF_EXIT]]: // FINITEONLY-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[__R_0_I_LCSSA]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[__R_0_I_LCSSA]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_rnormf( @@ -6324,7 +6324,7 @@ extern "C" __device__ double test_rint(double x) { // APPROX-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL6RNORMFIPKF_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] // APPROX: [[_ZL6RNORMFIPKF_EXIT]]: // APPROX-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_rnormf( @@ -6345,7 +6345,7 @@ extern "C" __device__ double test_rint(double x) { // NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL6RNORMFIPKF_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] // NCRDIV: [[_ZL6RNORMFIPKF_EXIT]]: // NCRDIV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rnormf( @@ -6391,7 +6391,7 @@ extern "C" __device__ float test_rnormf(int x, const float* y) { // DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL5RNORMIPKD_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] // DEFAULT: [[_ZL5RNORMIPKD_EXIT]]: // DEFAULT-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rnorm( @@ -6412,7 +6412,7 @@ extern "C" __device__ float test_rnormf(int x, const float* y) { // FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL5RNORMIPKD_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] // FINITEONLY: [[_ZL5RNORMIPKD_EXIT]]: // FINITEONLY-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[__R_0_I_LCSSA]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[__R_0_I_LCSSA]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_rnorm( @@ -6433,7 +6433,7 @@ extern "C" __device__ float test_rnormf(int x, const float* y) { // APPROX-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL5RNORMIPKD_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] // APPROX: [[_ZL5RNORMIPKD_EXIT]]: // APPROX-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_rnorm( @@ -6454,7 +6454,7 @@ extern "C" __device__ float test_rnormf(int x, const float* y) { // NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL5RNORMIPKD_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]] // NCRDIV: [[_ZL5RNORMIPKD_EXIT]]: // NCRDIV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rnorm( @@ -6485,25 +6485,25 @@ extern "C" __device__ double test_rnorm(int x, const double* y) { // DEFAULT-LABEL: define dso_local noundef float @test_rnorm3df( // DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR12]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rnorm3df( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen3_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], float noundef nofpclass(nan inf) [[Z]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen3_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], float noundef nofpclass(nan inf) [[Z]]) #[[ATTR12]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_rnorm3df( // APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_rnorm3df( // NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR12]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rnorm3df( @@ -6519,25 +6519,25 @@ extern "C" __device__ float test_rnorm3df(float x, float y, float z) { // DEFAULT-LABEL: define dso_local noundef double @test_rnorm3d( // DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR12]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rnorm3d( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen3_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], double noundef nofpclass(nan inf) [[Z]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen3_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], double noundef nofpclass(nan inf) [[Z]]) #[[ATTR12]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_rnorm3d( // APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_rnorm3d( // NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR12]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rnorm3d( @@ -6553,25 +6553,25 @@ extern "C" __device__ double test_rnorm3d(double x, double y, double z) { // DEFAULT-LABEL: define dso_local noundef float @test_rnorm4df( // DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR12]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rnorm4df( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen4_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], float noundef nofpclass(nan inf) [[Z]], float noundef nofpclass(nan inf) [[W]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen4_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], float noundef nofpclass(nan inf) [[Z]], float noundef nofpclass(nan inf) [[W]]) #[[ATTR12]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_rnorm4df( // APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_rnorm4df( // NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR12]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rnorm4df( @@ -6587,25 +6587,25 @@ extern "C" __device__ float test_rnorm4df(float x, float y, float z, float w) { // DEFAULT-LABEL: define dso_local noundef double @test_rnorm4d( // DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR12]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rnorm4d( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen4_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], double noundef nofpclass(nan inf) [[Z]], double noundef nofpclass(nan inf) [[W]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen4_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], double noundef nofpclass(nan inf) [[Z]], double noundef nofpclass(nan inf) [[W]]) #[[ATTR12]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_rnorm4d( // APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_rnorm4d( // NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR12]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rnorm4d( @@ -6689,25 +6689,25 @@ extern "C" __device__ double test_round(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_rsqrtf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rsqrtf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_rsqrtf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_rsqrtf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rsqrtf( @@ -6723,25 +6723,25 @@ extern "C" __device__ float test_rsqrtf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_rsqrt( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rsqrt( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_rsqrt( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_rsqrt( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rsqrt( @@ -6997,48 +6997,48 @@ extern "C" __device__ BOOL_TYPE test___signbit(double x) { // DEFAULT-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) -// DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // DEFAULT-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] // DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]] // DEFAULT-NEXT: store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] -// DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // DEFAULT-NEXT: ret void // // FINITEONLY-LABEL: define dso_local void @test_sincosf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) -// FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincos_f32(float noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincos_f32(float noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // FINITEONLY-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] // FINITEONLY-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]] // FINITEONLY-NEXT: store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] -// FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // FINITEONLY-NEXT: ret void // // APPROX-LABEL: define dso_local void @test_sincosf( // APPROX-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) -// APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// APPROX-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // APPROX-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] // APPROX-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]] // APPROX-NEXT: store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] -// APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret void // // NCRDIV-LABEL: define dso_local void @test_sincosf( // NCRDIV-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) -// NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // NCRDIV-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA17]] // NCRDIV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA17]] // NCRDIV-NEXT: store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA17]] -// NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // NCRDIV-NEXT: ret void // // AMDGCNSPIRV-LABEL: define spir_func void @test_sincosf( @@ -7062,48 +7062,48 @@ extern "C" __device__ void test_sincosf(float x, float *y, float *z) { // DEFAULT-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) -// DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // DEFAULT-NEXT: store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] // DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]] // DEFAULT-NEXT: store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]] -// DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // DEFAULT-NEXT: ret void // // FINITEONLY-LABEL: define dso_local void @test_sincos( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) -// FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincos_f64(double noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincos_f64(double noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // FINITEONLY-NEXT: store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] // FINITEONLY-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]] // FINITEONLY-NEXT: store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]] -// FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // FINITEONLY-NEXT: ret void // // APPROX-LABEL: define dso_local void @test_sincos( // APPROX-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) -// APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// APPROX-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // APPROX-NEXT: store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] // APPROX-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]] // APPROX-NEXT: store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]] -// APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret void // // NCRDIV-LABEL: define dso_local void @test_sincos( // NCRDIV-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) -// NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // NCRDIV-NEXT: store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA19]] // NCRDIV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA19]] // NCRDIV-NEXT: store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA19]] -// NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // NCRDIV-NEXT: ret void // // AMDGCNSPIRV-LABEL: define spir_func void @test_sincos( @@ -7127,48 +7127,48 @@ extern "C" __device__ void test_sincos(double x, double *y, double *z) { // DEFAULT-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) -// DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // DEFAULT-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] // DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]] // DEFAULT-NEXT: store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] -// DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // DEFAULT-NEXT: ret void // // FINITEONLY-LABEL: define dso_local void @test_sincospif( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) -// FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincospi_f32(float noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincospi_f32(float noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // FINITEONLY-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] // FINITEONLY-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]] // FINITEONLY-NEXT: store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] -// FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // FINITEONLY-NEXT: ret void // // APPROX-LABEL: define dso_local void @test_sincospif( // APPROX-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) -// APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// APPROX-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // APPROX-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] // APPROX-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]] // APPROX-NEXT: store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] -// APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret void // // NCRDIV-LABEL: define dso_local void @test_sincospif( // NCRDIV-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) -// NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // NCRDIV-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA17]] // NCRDIV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA17]] // NCRDIV-NEXT: store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA17]] -// NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // NCRDIV-NEXT: ret void // // AMDGCNSPIRV-LABEL: define spir_func void @test_sincospif( @@ -7192,48 +7192,48 @@ extern "C" __device__ void test_sincospif(float x, float *y, float *z) { // DEFAULT-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) -// DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // DEFAULT-NEXT: store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] // DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]] // DEFAULT-NEXT: store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]] -// DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // DEFAULT-NEXT: ret void // // FINITEONLY-LABEL: define dso_local void @test_sincospi( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) -// FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincospi_f64(double noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincospi_f64(double noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // FINITEONLY-NEXT: store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] // FINITEONLY-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]] // FINITEONLY-NEXT: store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]] -// FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // FINITEONLY-NEXT: ret void // // APPROX-LABEL: define dso_local void @test_sincospi( // APPROX-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) -// APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// APPROX-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // APPROX-NEXT: store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] // APPROX-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]] // APPROX-NEXT: store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]] -// APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret void // // NCRDIV-LABEL: define dso_local void @test_sincospi( // NCRDIV-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) -// NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]] // NCRDIV-NEXT: store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA19]] // NCRDIV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA19]] // NCRDIV-NEXT: store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA19]] -// NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] +// NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // NCRDIV-NEXT: ret void // // AMDGCNSPIRV-LABEL: define spir_func void @test_sincospi( @@ -7256,25 +7256,25 @@ extern "C" __device__ void test_sincospi(double x, double *y, double *z) { // DEFAULT-LABEL: define dso_local noundef float @test_sinf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_sinf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_sinf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I1]] // // NCRDIV-LABEL: define dso_local noundef float @test_sinf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_sinf( @@ -7290,25 +7290,25 @@ extern "C" __device__ float test_sinf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_sin( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_sin( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sin_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sin_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_sin( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_sin( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_sin( @@ -7324,25 +7324,25 @@ extern "C" __device__ double test_sin(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_sinpif( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_sinpif( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sinpi_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sinpi_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_sinpif( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_sinpif( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_sinpif( @@ -7358,25 +7358,25 @@ extern "C" __device__ float test_sinpif(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_sinpi( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_sinpi( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sinpi_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sinpi_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_sinpi( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_sinpi( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_sinpi( @@ -7460,25 +7460,25 @@ extern "C" __device__ double test_sqrt(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_tanf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_tanf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tan_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tan_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_tanf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_tanf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_tanf( @@ -7494,25 +7494,25 @@ extern "C" __device__ float test_tanf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_tan( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_tan( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tan_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tan_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_tan( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_tan( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_tan( @@ -7528,25 +7528,25 @@ extern "C" __device__ double test_tan(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_tanhf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_tanhf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tanh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tanh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_tanhf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_tanhf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_tanhf( @@ -7562,25 +7562,25 @@ extern "C" __device__ float test_tanhf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_tanh( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X]]) #[[ATTR13]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_tanh( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tanh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tanh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR13]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_tanh( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_tanh( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X]]) #[[ATTR13]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_tanh( @@ -7596,25 +7596,25 @@ extern "C" __device__ double test_tanh(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_tgammaf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_tgammaf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tgamma_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tgamma_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_tgammaf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_tgammaf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_tgammaf( @@ -7630,25 +7630,25 @@ extern "C" __device__ float test_tgammaf(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_tgamma( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_tgamma( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tgamma_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tgamma_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_tgamma( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_tgamma( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_tgamma( @@ -7732,25 +7732,25 @@ extern "C" __device__ double test_trunc(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_y0f( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_y0f( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_y0f( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_y0f( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_y0f( @@ -7766,25 +7766,25 @@ extern "C" __device__ float test_y0f(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_y0( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_y0( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_y0( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_y0( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_y0( @@ -7800,25 +7800,25 @@ extern "C" __device__ double test_y0(double x) { // DEFAULT-LABEL: define dso_local noundef float @test_y1f( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_y1f( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test_y1f( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test_y1f( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test_y1f( @@ -7834,25 +7834,25 @@ extern "C" __device__ float test_y1f(float x) { // DEFAULT-LABEL: define dso_local noundef double @test_y1( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_y1( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // // APPROX-LABEL: define dso_local noundef double @test_y1( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef double @test_y1( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef double @test_y1( @@ -7873,14 +7873,14 @@ extern "C" __device__ double test_y1(double x) { // DEFAULT-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // DEFAULT-NEXT: ] // DEFAULT: [[IF_THEN_I]]: -// DEFAULT-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: br label %[[_ZL3YNFIF_EXIT:.*]] // DEFAULT: [[IF_THEN2_I]]: -// DEFAULT-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: br label %[[_ZL3YNFIF_EXIT]] // DEFAULT: [[IF_END4_I]]: -// DEFAULT-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]] -// DEFAULT-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 // DEFAULT-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3YNFIF_EXIT]] // DEFAULT: [[FOR_BODY_I]]: @@ -7907,14 +7907,14 @@ extern "C" __device__ double test_y1(double x) { // FINITEONLY-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // FINITEONLY-NEXT: ] // FINITEONLY: [[IF_THEN_I]]: -// FINITEONLY-NEXT: [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: br label %[[_ZL3YNFIF_EXIT:.*]] // FINITEONLY: [[IF_THEN2_I]]: -// FINITEONLY-NEXT: [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: br label %[[_ZL3YNFIF_EXIT]] // FINITEONLY: [[IF_END4_I]]: -// FINITEONLY-NEXT: [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] -// FINITEONLY-NEXT: [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 // FINITEONLY-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3YNFIF_EXIT]] // FINITEONLY: [[FOR_BODY_I]]: @@ -7941,14 +7941,14 @@ extern "C" __device__ double test_y1(double x) { // APPROX-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // APPROX-NEXT: ] // APPROX: [[IF_THEN_I]]: -// APPROX-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: br label %[[_ZL3YNFIF_EXIT:.*]] // APPROX: [[IF_THEN2_I]]: -// APPROX-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: br label %[[_ZL3YNFIF_EXIT]] // APPROX: [[IF_END4_I]]: -// APPROX-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]] -// APPROX-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 // APPROX-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3YNFIF_EXIT]] // APPROX: [[FOR_BODY_I]]: @@ -7975,14 +7975,14 @@ extern "C" __device__ double test_y1(double x) { // NCRDIV-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // NCRDIV-NEXT: ] // NCRDIV: [[IF_THEN_I]]: -// NCRDIV-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: br label %[[_ZL3YNFIF_EXIT:.*]] // NCRDIV: [[IF_THEN2_I]]: -// NCRDIV-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: br label %[[_ZL3YNFIF_EXIT]] // NCRDIV: [[IF_END4_I]]: -// NCRDIV-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]] -// NCRDIV-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 // NCRDIV-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3YNFIF_EXIT]] // NCRDIV: [[FOR_BODY_I]]: @@ -7991,7 +7991,7 @@ extern "C" __device__ double test_y1(double x) { // NCRDIV-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // NCRDIV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // NCRDIV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float -// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]], !fpmath [[META12]] +// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]], !fpmath [[META14]] // NCRDIV-NEXT: [[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], [[DIV_I]] // NCRDIV-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] // NCRDIV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 @@ -8047,14 +8047,14 @@ extern "C" __device__ float test_ynf(int x, float y) { // DEFAULT-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // DEFAULT-NEXT: ] // DEFAULT: [[IF_THEN_I]]: -// DEFAULT-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: br label %[[_ZL2YNID_EXIT:.*]] // DEFAULT: [[IF_THEN2_I]]: -// DEFAULT-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: br label %[[_ZL2YNID_EXIT]] // DEFAULT: [[IF_END4_I]]: -// DEFAULT-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]] -// DEFAULT-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 // DEFAULT-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2YNID_EXIT]] // DEFAULT: [[FOR_BODY_I]]: @@ -8081,14 +8081,14 @@ extern "C" __device__ float test_ynf(int x, float y) { // FINITEONLY-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // FINITEONLY-NEXT: ] // FINITEONLY: [[IF_THEN_I]]: -// FINITEONLY-NEXT: [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: br label %[[_ZL2YNID_EXIT:.*]] // FINITEONLY: [[IF_THEN2_I]]: -// FINITEONLY-NEXT: [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: br label %[[_ZL2YNID_EXIT]] // FINITEONLY: [[IF_END4_I]]: -// FINITEONLY-NEXT: [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] -// FINITEONLY-NEXT: [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 // FINITEONLY-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2YNID_EXIT]] // FINITEONLY: [[FOR_BODY_I]]: @@ -8115,14 +8115,14 @@ extern "C" __device__ float test_ynf(int x, float y) { // APPROX-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // APPROX-NEXT: ] // APPROX: [[IF_THEN_I]]: -// APPROX-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: br label %[[_ZL2YNID_EXIT:.*]] // APPROX: [[IF_THEN2_I]]: -// APPROX-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: br label %[[_ZL2YNID_EXIT]] // APPROX: [[IF_END4_I]]: -// APPROX-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]] -// APPROX-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 // APPROX-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2YNID_EXIT]] // APPROX: [[FOR_BODY_I]]: @@ -8149,14 +8149,14 @@ extern "C" __device__ float test_ynf(int x, float y) { // NCRDIV-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // NCRDIV-NEXT: ] // NCRDIV: [[IF_THEN_I]]: -// NCRDIV-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: br label %[[_ZL2YNID_EXIT:.*]] // NCRDIV: [[IF_THEN2_I]]: -// NCRDIV-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: br label %[[_ZL2YNID_EXIT]] // NCRDIV: [[IF_END4_I]]: -// NCRDIV-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]] -// NCRDIV-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 // NCRDIV-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2YNID_EXIT]] // NCRDIV: [[FOR_BODY_I]]: @@ -8216,25 +8216,25 @@ extern "C" __device__ double test_yn(int x, double y) { // DEFAULT-LABEL: define dso_local noundef float @test___cosf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___cosf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test___cosf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test___cosf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test___cosf( @@ -8380,7 +8380,7 @@ extern "C" __device__ float test___fadd_rn(float x, float y) { // NCRDIV-LABEL: define dso_local noundef float @test___fdividef( // NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]], !fpmath [[META12]] +// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]], !fpmath [[META14]] // NCRDIV-NEXT: ret float [[DIV_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test___fdividef( @@ -8482,7 +8482,7 @@ extern "C" __device__ float test___fmul_rn(float x, float y) { // NCRDIV-LABEL: define dso_local noundef float @test___frcp_rn( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X]], !fpmath [[META12]] +// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X]], !fpmath [[META14]] // NCRDIV-NEXT: ret float [[DIV_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test___frcp_rn( @@ -8532,25 +8532,25 @@ extern "C" __device__ float test___frsqrt_rn(float x) { // DEFAULT-LABEL: define dso_local noundef float @test___fsqrt_rn( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X]]) #[[ATTR12]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___fsqrt_rn( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sqrt_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sqrt_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR12]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test___fsqrt_rn( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test___fsqrt_rn( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X]]) #[[ATTR12]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test___fsqrt_rn( @@ -8702,25 +8702,25 @@ extern "C" __device__ float test___logf(float x) { // DEFAULT-LABEL: define dso_local noundef float @test___powf( // DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR13]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___powf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR15]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR13]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test___powf( // APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test___powf( // NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR13]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test___powf( @@ -8785,36 +8785,36 @@ extern "C" __device__ float test___saturatef(float x) { // DEFAULT-LABEL: define dso_local void @test___sincosf( // DEFAULT-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] -// DEFAULT-NEXT: [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: store float [[CALL1_I]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] // DEFAULT-NEXT: ret void // // FINITEONLY-LABEL: define dso_local void @test___sincosf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] -// FINITEONLY-NEXT: [[CALL1_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL1_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: store float [[CALL1_I]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] // FINITEONLY-NEXT: ret void // // APPROX-LABEL: define dso_local void @test___sincosf( // APPROX-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] -// APPROX-NEXT: [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: store float [[CALL1_I]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] // APPROX-NEXT: ret void // // NCRDIV-LABEL: define dso_local void @test___sincosf( // NCRDIV-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA17]] -// NCRDIV-NEXT: [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: store float [[CALL1_I]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA17]] // NCRDIV-NEXT: ret void // @@ -8834,25 +8834,25 @@ extern "C" __device__ void test___sincosf(float x, float *y, float *z) { // DEFAULT-LABEL: define dso_local noundef float @test___sinf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // // FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___sinf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // // APPROX-LABEL: define dso_local noundef float @test___sinf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // // NCRDIV-LABEL: define dso_local noundef float @test___sinf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // // AMDGCNSPIRV-LABEL: define spir_func noundef float @test___sinf( @@ -8868,8 +8868,8 @@ extern "C" __device__ float test___sinf(float x) { // DEFAULT-LABEL: define dso_local float @test___tanf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // DEFAULT-NEXT: [[ENTRY:.*:]] -// DEFAULT-NEXT: [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] -// DEFAULT-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR14]] +// DEFAULT-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]]) // DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]] // DEFAULT-NEXT: ret float [[MUL_I]] @@ -8877,8 +8877,8 @@ extern "C" __device__ float test___sinf(float x) { // FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test___tanf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] -// FINITEONLY-NEXT: [[CALL_I3_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] -// FINITEONLY-NEXT: [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[CALL_I3_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] +// FINITEONLY-NEXT: [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]]) // FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract float [[CALL_I3_I]], [[TMP0]] // FINITEONLY-NEXT: ret float [[MUL_I]] @@ -8886,8 +8886,8 @@ extern "C" __device__ float test___sinf(float x) { // APPROX-LABEL: define dso_local float @test___tanf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // APPROX-NEXT: [[ENTRY:.*:]] -// APPROX-NEXT: [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] -// APPROX-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR14]] +// APPROX-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]]) // APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]] // APPROX-NEXT: ret float [[MUL_I]] @@ -8895,8 +8895,8 @@ extern "C" __device__ float test___sinf(float x) { // NCRDIV-LABEL: define dso_local float @test___tanf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { // NCRDIV-NEXT: [[ENTRY:.*:]] -// NCRDIV-NEXT: [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] -// NCRDIV-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR14]] +// NCRDIV-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]]) // NCRDIV-NEXT: [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]] // NCRDIV-NEXT: ret float [[MUL_I]] @@ -9285,120 +9285,120 @@ extern "C" __device__ int test_int_max(int x, int y) { return max(x, y); } //. -// DEFAULT: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -// DEFAULT: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -// DEFAULT: [[META6]] = !{!"Simple C++ TBAA"} -// DEFAULT: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]} -// DEFAULT: [[META8]] = !{!"llvm.loop.mustprogress"} -// DEFAULT: [[META9]] = !{!"llvm.loop.unroll.disable"} -// DEFAULT: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]} -// DEFAULT: [[LOOP11]] = distinct !{[[LOOP11]], [[META8]], [[META9]]} -// DEFAULT: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} -// DEFAULT: [[META13]] = !{!"int", [[META5]], i64 0} -// DEFAULT: [[LOOP14]] = distinct !{[[LOOP14]], [[META8]], [[META9]]} -// DEFAULT: [[LOOP15]] = distinct !{[[LOOP15]], [[META8]], [[META9]]} +// DEFAULT: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// DEFAULT: [[META5]] = !{!"int", [[META6:![0-9]+]], i64 0} +// DEFAULT: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// DEFAULT: [[META7]] = !{!"Simple C++ TBAA"} +// DEFAULT: [[CHAR_TBAA8]] = !{[[META6]], [[META6]], i64 0} +// DEFAULT: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]]} +// DEFAULT: [[META10]] = !{!"llvm.loop.mustprogress"} +// DEFAULT: [[META11]] = !{!"llvm.loop.unroll.disable"} +// DEFAULT: [[LOOP12]] = distinct !{[[LOOP12]], [[META10]], [[META11]]} +// DEFAULT: [[LOOP13]] = distinct !{[[LOOP13]], [[META10]], [[META11]]} +// DEFAULT: [[LOOP14]] = distinct !{[[LOOP14]], [[META10]], [[META11]]} +// DEFAULT: [[LOOP15]] = distinct !{[[LOOP15]], [[META10]], [[META11]]} // DEFAULT: [[FLOAT_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0} -// DEFAULT: [[META17]] = !{!"float", [[META5]], i64 0} +// DEFAULT: [[META17]] = !{!"float", [[META6]], i64 0} // DEFAULT: [[DOUBLE_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} -// DEFAULT: [[META19]] = !{!"double", [[META5]], i64 0} -// DEFAULT: [[LOOP20]] = distinct !{[[LOOP20]], [[META8]], [[META9]]} -// DEFAULT: [[LOOP21]] = distinct !{[[LOOP21]], [[META8]], [[META9]]} -// DEFAULT: [[LOOP22]] = distinct !{[[LOOP22]], [[META8]], [[META9]]} -// DEFAULT: [[LOOP23]] = distinct !{[[LOOP23]], [[META8]], [[META9]]} -// DEFAULT: [[LOOP24]] = distinct !{[[LOOP24]], [[META8]], [[META9]]} -// DEFAULT: [[LOOP25]] = distinct !{[[LOOP25]], [[META8]], [[META9]]} +// DEFAULT: [[META19]] = !{!"double", [[META6]], i64 0} +// DEFAULT: [[LOOP20]] = distinct !{[[LOOP20]], [[META10]], [[META11]]} +// DEFAULT: [[LOOP21]] = distinct !{[[LOOP21]], [[META10]], [[META11]]} +// DEFAULT: [[LOOP22]] = distinct !{[[LOOP22]], [[META10]], [[META11]]} +// DEFAULT: [[LOOP23]] = distinct !{[[LOOP23]], [[META10]], [[META11]]} +// DEFAULT: [[LOOP24]] = distinct !{[[LOOP24]], [[META10]], [[META11]]} +// DEFAULT: [[LOOP25]] = distinct !{[[LOOP25]], [[META10]], [[META11]]} //. -// FINITEONLY: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -// FINITEONLY: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -// FINITEONLY: [[META6]] = !{!"Simple C++ TBAA"} -// FINITEONLY: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]} -// FINITEONLY: [[META8]] = !{!"llvm.loop.mustprogress"} -// FINITEONLY: [[META9]] = !{!"llvm.loop.unroll.disable"} -// FINITEONLY: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]} -// FINITEONLY: [[LOOP11]] = distinct !{[[LOOP11]], [[META8]], [[META9]]} -// FINITEONLY: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} -// FINITEONLY: [[META13]] = !{!"int", [[META5]], i64 0} -// FINITEONLY: [[LOOP14]] = distinct !{[[LOOP14]], [[META8]], [[META9]]} -// FINITEONLY: [[LOOP15]] = distinct !{[[LOOP15]], [[META8]], [[META9]]} +// FINITEONLY: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// FINITEONLY: [[META5]] = !{!"int", [[META6:![0-9]+]], i64 0} +// FINITEONLY: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// FINITEONLY: [[META7]] = !{!"Simple C++ TBAA"} +// FINITEONLY: [[CHAR_TBAA8]] = !{[[META6]], [[META6]], i64 0} +// FINITEONLY: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]]} +// FINITEONLY: [[META10]] = !{!"llvm.loop.mustprogress"} +// FINITEONLY: [[META11]] = !{!"llvm.loop.unroll.disable"} +// FINITEONLY: [[LOOP12]] = distinct !{[[LOOP12]], [[META10]], [[META11]]} +// FINITEONLY: [[LOOP13]] = distinct !{[[LOOP13]], [[META10]], [[META11]]} +// FINITEONLY: [[LOOP14]] = distinct !{[[LOOP14]], [[META10]], [[META11]]} +// FINITEONLY: [[LOOP15]] = distinct !{[[LOOP15]], [[META10]], [[META11]]} // FINITEONLY: [[FLOAT_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0} -// FINITEONLY: [[META17]] = !{!"float", [[META5]], i64 0} +// FINITEONLY: [[META17]] = !{!"float", [[META6]], i64 0} // FINITEONLY: [[DOUBLE_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} -// FINITEONLY: [[META19]] = !{!"double", [[META5]], i64 0} -// FINITEONLY: [[LOOP20]] = distinct !{[[LOOP20]], [[META8]], [[META9]]} -// FINITEONLY: [[LOOP21]] = distinct !{[[LOOP21]], [[META8]], [[META9]]} -// FINITEONLY: [[LOOP22]] = distinct !{[[LOOP22]], [[META8]], [[META9]]} -// FINITEONLY: [[LOOP23]] = distinct !{[[LOOP23]], [[META8]], [[META9]]} -// FINITEONLY: [[LOOP24]] = distinct !{[[LOOP24]], [[META8]], [[META9]]} -// FINITEONLY: [[LOOP25]] = distinct !{[[LOOP25]], [[META8]], [[META9]]} +// FINITEONLY: [[META19]] = !{!"double", [[META6]], i64 0} +// FINITEONLY: [[LOOP20]] = distinct !{[[LOOP20]], [[META10]], [[META11]]} +// FINITEONLY: [[LOOP21]] = distinct !{[[LOOP21]], [[META10]], [[META11]]} +// FINITEONLY: [[LOOP22]] = distinct !{[[LOOP22]], [[META10]], [[META11]]} +// FINITEONLY: [[LOOP23]] = distinct !{[[LOOP23]], [[META10]], [[META11]]} +// FINITEONLY: [[LOOP24]] = distinct !{[[LOOP24]], [[META10]], [[META11]]} +// FINITEONLY: [[LOOP25]] = distinct !{[[LOOP25]], [[META10]], [[META11]]} //. -// APPROX: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -// APPROX: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -// APPROX: [[META6]] = !{!"Simple C++ TBAA"} -// APPROX: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]} -// APPROX: [[META8]] = !{!"llvm.loop.mustprogress"} -// APPROX: [[META9]] = !{!"llvm.loop.unroll.disable"} -// APPROX: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]} -// APPROX: [[LOOP11]] = distinct !{[[LOOP11]], [[META8]], [[META9]]} -// APPROX: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} -// APPROX: [[META13]] = !{!"int", [[META5]], i64 0} -// APPROX: [[LOOP14]] = distinct !{[[LOOP14]], [[META8]], [[META9]]} -// APPROX: [[LOOP15]] = distinct !{[[LOOP15]], [[META8]], [[META9]]} +// APPROX: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// APPROX: [[META5]] = !{!"int", [[META6:![0-9]+]], i64 0} +// APPROX: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// APPROX: [[META7]] = !{!"Simple C++ TBAA"} +// APPROX: [[CHAR_TBAA8]] = !{[[META6]], [[META6]], i64 0} +// APPROX: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]]} +// APPROX: [[META10]] = !{!"llvm.loop.mustprogress"} +// APPROX: [[META11]] = !{!"llvm.loop.unroll.disable"} +// APPROX: [[LOOP12]] = distinct !{[[LOOP12]], [[META10]], [[META11]]} +// APPROX: [[LOOP13]] = distinct !{[[LOOP13]], [[META10]], [[META11]]} +// APPROX: [[LOOP14]] = distinct !{[[LOOP14]], [[META10]], [[META11]]} +// APPROX: [[LOOP15]] = distinct !{[[LOOP15]], [[META10]], [[META11]]} // APPROX: [[FLOAT_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0} -// APPROX: [[META17]] = !{!"float", [[META5]], i64 0} +// APPROX: [[META17]] = !{!"float", [[META6]], i64 0} // APPROX: [[DOUBLE_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} -// APPROX: [[META19]] = !{!"double", [[META5]], i64 0} -// APPROX: [[LOOP20]] = distinct !{[[LOOP20]], [[META8]], [[META9]]} -// APPROX: [[LOOP21]] = distinct !{[[LOOP21]], [[META8]], [[META9]]} -// APPROX: [[LOOP22]] = distinct !{[[LOOP22]], [[META8]], [[META9]]} -// APPROX: [[LOOP23]] = distinct !{[[LOOP23]], [[META8]], [[META9]]} -// APPROX: [[LOOP24]] = distinct !{[[LOOP24]], [[META8]], [[META9]]} -// APPROX: [[LOOP25]] = distinct !{[[LOOP25]], [[META8]], [[META9]]} +// APPROX: [[META19]] = !{!"double", [[META6]], i64 0} +// APPROX: [[LOOP20]] = distinct !{[[LOOP20]], [[META10]], [[META11]]} +// APPROX: [[LOOP21]] = distinct !{[[LOOP21]], [[META10]], [[META11]]} +// APPROX: [[LOOP22]] = distinct !{[[LOOP22]], [[META10]], [[META11]]} +// APPROX: [[LOOP23]] = distinct !{[[LOOP23]], [[META10]], [[META11]]} +// APPROX: [[LOOP24]] = distinct !{[[LOOP24]], [[META10]], [[META11]]} +// APPROX: [[LOOP25]] = distinct !{[[LOOP25]], [[META10]], [[META11]]} //. -// NCRDIV: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -// NCRDIV: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} -// NCRDIV: [[META6]] = !{!"Simple C++ TBAA"} -// NCRDIV: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]} -// NCRDIV: [[META8]] = !{!"llvm.loop.mustprogress"} -// NCRDIV: [[META9]] = !{!"llvm.loop.unroll.disable"} -// NCRDIV: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]} -// NCRDIV: [[LOOP11]] = distinct !{[[LOOP11]], [[META8]], [[META9]]} -// NCRDIV: [[META12]] = !{float 2.500000e+00} -// NCRDIV: [[INT_TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0} -// NCRDIV: [[META14]] = !{!"int", [[META5]], i64 0} -// NCRDIV: [[LOOP15]] = distinct !{[[LOOP15]], [[META8]], [[META9]]} -// NCRDIV: [[LOOP16]] = distinct !{[[LOOP16]], [[META8]], [[META9]]} +// NCRDIV: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// NCRDIV: [[META5]] = !{!"int", [[META6:![0-9]+]], i64 0} +// NCRDIV: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// NCRDIV: [[META7]] = !{!"Simple C++ TBAA"} +// NCRDIV: [[CHAR_TBAA8]] = !{[[META6]], [[META6]], i64 0} +// NCRDIV: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]]} +// NCRDIV: [[META10]] = !{!"llvm.loop.mustprogress"} +// NCRDIV: [[META11]] = !{!"llvm.loop.unroll.disable"} +// NCRDIV: [[LOOP12]] = distinct !{[[LOOP12]], [[META10]], [[META11]]} +// NCRDIV: [[LOOP13]] = distinct !{[[LOOP13]], [[META10]], [[META11]]} +// NCRDIV: [[META14]] = !{float 2.500000e+00} +// NCRDIV: [[LOOP15]] = distinct !{[[LOOP15]], [[META10]], [[META11]]} +// NCRDIV: [[LOOP16]] = distinct !{[[LOOP16]], [[META10]], [[META11]]} // NCRDIV: [[FLOAT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} -// NCRDIV: [[META18]] = !{!"float", [[META5]], i64 0} +// NCRDIV: [[META18]] = !{!"float", [[META6]], i64 0} // NCRDIV: [[DOUBLE_TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0} -// NCRDIV: [[META20]] = !{!"double", [[META5]], i64 0} -// NCRDIV: [[LOOP21]] = distinct !{[[LOOP21]], [[META8]], [[META9]]} -// NCRDIV: [[LOOP22]] = distinct !{[[LOOP22]], [[META8]], [[META9]]} -// NCRDIV: [[LOOP23]] = distinct !{[[LOOP23]], [[META8]], [[META9]]} -// NCRDIV: [[LOOP24]] = distinct !{[[LOOP24]], [[META8]], [[META9]]} +// NCRDIV: [[META20]] = !{!"double", [[META6]], i64 0} +// NCRDIV: [[LOOP21]] = distinct !{[[LOOP21]], [[META10]], [[META11]]} +// NCRDIV: [[LOOP22]] = distinct !{[[LOOP22]], [[META10]], [[META11]]} +// NCRDIV: [[LOOP23]] = distinct !{[[LOOP23]], [[META10]], [[META11]]} +// NCRDIV: [[LOOP24]] = distinct !{[[LOOP24]], [[META10]], [[META11]]} // NCRDIV: [[META25]] = !{float 3.000000e+00} -// NCRDIV: [[LOOP26]] = distinct !{[[LOOP26]], [[META8]], [[META9]]} -// NCRDIV: [[LOOP27]] = distinct !{[[LOOP27]], [[META8]], [[META9]]} +// NCRDIV: [[LOOP26]] = distinct !{[[LOOP26]], [[META10]], [[META11]]} +// NCRDIV: [[LOOP27]] = distinct !{[[LOOP27]], [[META10]], [[META11]]} //. -// AMDGCNSPIRV: [[CHAR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} -// AMDGCNSPIRV: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} -// AMDGCNSPIRV: [[META7]] = !{!"Simple C++ TBAA"} -// AMDGCNSPIRV: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]], [[META10:![0-9]+]]} -// AMDGCNSPIRV: [[META9]] = !{!"llvm.loop.mustprogress"} -// AMDGCNSPIRV: [[META10]] = !{!"llvm.loop.unroll.disable"} -// AMDGCNSPIRV: [[LOOP11]] = distinct !{[[LOOP11]], [[META9]], [[META10]]} -// AMDGCNSPIRV: [[LOOP12]] = distinct !{[[LOOP12]], [[META9]], [[META10]]} -// AMDGCNSPIRV: [[INT_TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0} -// AMDGCNSPIRV: [[META14]] = !{!"int", [[META6]], i64 0} -// AMDGCNSPIRV: [[LOOP15]] = distinct !{[[LOOP15]], [[META9]], [[META10]]} -// AMDGCNSPIRV: [[LOOP16]] = distinct !{[[LOOP16]], [[META9]], [[META10]]} +// AMDGCNSPIRV: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +// AMDGCNSPIRV: [[META6]] = !{!"int", [[META7:![0-9]+]], i64 0} +// AMDGCNSPIRV: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} +// AMDGCNSPIRV: [[META8]] = !{!"Simple C++ TBAA"} +// AMDGCNSPIRV: [[CHAR_TBAA9]] = !{[[META7]], [[META7]], i64 0} +// AMDGCNSPIRV: [[LOOP10]] = distinct !{[[LOOP10]], [[META11:![0-9]+]], [[META12:![0-9]+]]} +// AMDGCNSPIRV: [[META11]] = !{!"llvm.loop.mustprogress"} +// AMDGCNSPIRV: [[META12]] = !{!"llvm.loop.unroll.disable"} +// AMDGCNSPIRV: [[LOOP13]] = distinct !{[[LOOP13]], [[META11]], [[META12]]} +// AMDGCNSPIRV: [[LOOP14]] = distinct !{[[LOOP14]], [[META11]], [[META12]]} +// AMDGCNSPIRV: [[LOOP15]] = distinct !{[[LOOP15]], [[META11]], [[META12]]} +// AMDGCNSPIRV: [[LOOP16]] = distinct !{[[LOOP16]], [[META11]], [[META12]]} // AMDGCNSPIRV: [[FLOAT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} -// AMDGCNSPIRV: [[META18]] = !{!"float", [[META6]], i64 0} +// AMDGCNSPIRV: [[META18]] = !{!"float", [[META7]], i64 0} // AMDGCNSPIRV: [[DOUBLE_TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0} -// AMDGCNSPIRV: [[META20]] = !{!"double", [[META6]], i64 0} -// AMDGCNSPIRV: [[LOOP21]] = distinct !{[[LOOP21]], [[META9]], [[META10]]} -// AMDGCNSPIRV: [[LOOP22]] = distinct !{[[LOOP22]], [[META9]], [[META10]]} -// AMDGCNSPIRV: [[LOOP23]] = distinct !{[[LOOP23]], [[META9]], [[META10]]} -// AMDGCNSPIRV: [[LOOP24]] = distinct !{[[LOOP24]], [[META9]], [[META10]]} -// AMDGCNSPIRV: [[LOOP25]] = distinct !{[[LOOP25]], [[META9]], [[META10]]} -// AMDGCNSPIRV: [[LOOP26]] = distinct !{[[LOOP26]], [[META9]], [[META10]]} +// AMDGCNSPIRV: [[META20]] = !{!"double", [[META7]], i64 0} +// AMDGCNSPIRV: [[LOOP21]] = distinct !{[[LOOP21]], [[META11]], [[META12]]} +// AMDGCNSPIRV: [[LOOP22]] = distinct !{[[LOOP22]], [[META11]], [[META12]]} +// AMDGCNSPIRV: [[LOOP23]] = distinct !{[[LOOP23]], [[META11]], [[META12]]} +// AMDGCNSPIRV: [[LOOP24]] = distinct !{[[LOOP24]], [[META11]], [[META12]]} +// AMDGCNSPIRV: [[LOOP25]] = distinct !{[[LOOP25]], [[META11]], [[META12]]} +// AMDGCNSPIRV: [[LOOP26]] = distinct !{[[LOOP26]], [[META11]], [[META12]]} //. diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c index 03f20c5988268..2545a014e4340 100644 --- a/clang/test/Headers/wasm.c +++ b/clang/test/Headers/wasm.c @@ -10,7 +10,7 @@ // CHECK-LABEL: define hidden <4 x i32> @test_v128_load( // CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-NEXT: ret <4 x i32> [[TMP0]] // v128_t test_v128_load(const void *mem) { @@ -20,7 +20,7 @@ v128_t test_v128_load(const void *mem) { // CHECK-LABEL: define hidden <4 x i32> @test_v128_load8_splat( // CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[VECINIT16_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VECINIT16_I]] to <4 x i32> @@ -33,7 +33,7 @@ v128_t test_v128_load8_splat(const void *mem) { // CHECK-LABEL: define hidden <4 x i32> @test_v128_load16_splat( // CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0 // CHECK-NEXT: [[VECINIT8_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT8_I]] to <4 x i32> @@ -46,7 +46,7 @@ v128_t test_v128_load16_splat(const void *mem) { // CHECK-LABEL: define hidden <4 x i32> @test_v128_load32_splat( // CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 // CHECK-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: ret <4 x i32> [[VECINIT4_I]] @@ -58,7 +58,7 @@ v128_t test_v128_load32_splat(const void *mem) { // CHECK-LABEL: define hidden <4 x i32> @test_v128_load64_splat( // CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0 // CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT2_I]] to <4 x i32> @@ -71,7 +71,7 @@ v128_t test_v128_load64_splat(const void *mem) { // CHECK-LABEL: define hidden <4 x i32> @test_i16x8_load8x8( // CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[CONV_I:%.*]] = sext <8 x i8> [[TMP0]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -83,7 +83,7 @@ v128_t test_i16x8_load8x8(const void *mem) { // CHECK-LABEL: define hidden <4 x i32> @test_u16x8_load8x8( // CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[CONV_I:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -95,7 +95,7 @@ v128_t test_u16x8_load8x8(const void *mem) { // CHECK-LABEL: define hidden range(i32 -32768, 32768) <4 x i32> @test_i32x4_load16x4( // CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[CONV_I:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[CONV_I]] // @@ -106,7 +106,7 @@ v128_t test_i32x4_load16x4(const void *mem) { // CHECK-LABEL: define hidden range(i32 0, 65536) <4 x i32> @test_u32x4_load16x4( // CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[CONV_I:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[CONV_I]] // @@ -117,7 +117,7 @@ v128_t test_u32x4_load16x4(const void *mem) { // CHECK-LABEL: define hidden <4 x i32> @test_i64x2_load32x2( // CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[CONV_I:%.*]] = sext <2 x i32> [[TMP0]] to <2 x i64> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -129,7 +129,7 @@ v128_t test_i64x2_load32x2(const void *mem) { // CHECK-LABEL: define hidden <4 x i32> @test_u64x2_load32x2( // CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[CONV_I:%.*]] = zext <2 x i32> [[TMP0]] to <2 x i64> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -141,7 +141,7 @@ v128_t test_u64x2_load32x2(const void *mem) { // CHECK-LABEL: define hidden <4 x i32> @test_v128_load32_zero( // CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <4 x i32> , i32 [[TMP0]], i64 0 // CHECK-NEXT: ret <4 x i32> [[VECINIT4_I]] // @@ -152,7 +152,7 @@ v128_t test_v128_load32_zero(const void *mem) { // CHECK-LABEL: define hidden <4 x i32> @test_v128_load64_zero( // CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <2 x i64> , i64 [[TMP0]], i64 0 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT2_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -164,7 +164,7 @@ v128_t test_v128_load64_zero(const void *mem) { // CHECK-LABEL: define hidden <4 x i32> @test_v128_load8_lane( // CHECK-SAME: ptr noundef readonly captures(none) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC]] to <16 x i8> // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[TMP0]], i64 15 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32> @@ -177,7 +177,7 @@ v128_t test_v128_load8_lane(const uint8_t *ptr, v128_t vec) { // CHECK-LABEL: define hidden <4 x i32> @test_v128_load16_lane( // CHECK-SAME: ptr noundef readonly captures(none) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC]] to <8 x i16> // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[TMP0]], i64 7 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32> @@ -190,7 +190,7 @@ v128_t test_v128_load16_lane(const uint16_t *ptr, v128_t vec) { // CHECK-LABEL: define hidden <4 x i32> @test_v128_load32_lane( // CHECK-SAME: ptr noundef readonly captures(none) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x i32> [[VEC]], i32 [[TMP0]], i64 3 // CHECK-NEXT: ret <4 x i32> [[VECINS_I]] // @@ -201,7 +201,7 @@ v128_t test_v128_load32_lane(const uint32_t *ptr, v128_t vec) { // CHECK-LABEL: define hidden <4 x i32> @test_v128_load64_lane( // CHECK-SAME: ptr noundef readonly captures(none) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC]] to <2 x i64> // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP0]], i64 1 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32> @@ -214,7 +214,7 @@ v128_t test_v128_load64_lane(const uint64_t *ptr, v128_t vec) { // CHECK-LABEL: define hidden void @test_v128_store( // CHECK-SAME: ptr noundef writeonly captures(none) initializes((0, 16)) [[MEM:%.*]], <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: store <4 x i32> [[A]], ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x i32> [[A]], ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // void test_v128_store(void *mem, v128_t a) { @@ -226,7 +226,7 @@ void test_v128_store(void *mem, v128_t a) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC]] to <16 x i8> // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i64 15 -// CHECK-NEXT: store i8 [[VECEXT_I]], ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store i8 [[VECEXT_I]], ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // void test_v128_store8_lane(uint8_t *ptr, v128_t vec) { @@ -238,7 +238,7 @@ void test_v128_store8_lane(uint8_t *ptr, v128_t vec) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC]] to <8 x i16> // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i64 7 -// CHECK-NEXT: store i16 [[VECEXT_I]], ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store i16 [[VECEXT_I]], ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // void test_v128_store16_lane(uint16_t *ptr, v128_t vec) { @@ -249,7 +249,7 @@ void test_v128_store16_lane(uint16_t *ptr, v128_t vec) { // CHECK-SAME: ptr noundef writeonly captures(none) initializes((0, 4)) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR1]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[VEC]], i64 3 -// CHECK-NEXT: store i32 [[VECEXT_I]], ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store i32 [[VECEXT_I]], ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // void test_v128_store32_lane(uint32_t *ptr, v128_t vec) { @@ -261,7 +261,7 @@ void test_v128_store32_lane(uint32_t *ptr, v128_t vec) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC]] to <2 x i64> // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1 -// CHECK-NEXT: store i64 [[VECEXT_I]], ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store i64 [[VECEXT_I]], ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // void test_v128_store64_lane(uint64_t *ptr, v128_t vec) { @@ -3466,7 +3466,7 @@ v128_t test_i16x8_q15mulr_sat(v128_t a, v128_t b) { return wasm_i16x8_q15mulr_sat(a, b); } //. -// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0} //. diff --git a/clang/test/OpenMP/bug54082.c b/clang/test/OpenMP/bug54082.c index ef3e7153545bf..1a98e122b84ca 100644 --- a/clang/test/OpenMP/bug54082.c +++ b/clang/test/OpenMP/bug54082.c @@ -72,7 +72,7 @@ void foo() { // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(16) [[X_TRAITS]], ptr noundef nonnull align 16 dereferenceable(16) @__const.foo.x_traits, i64 16, i1 false) // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[X_ALLOC]]) #[[ATTR5]] // CHECK-NEXT: [[CALL:%.*]] = call i64 @omp_init_allocator(i64 noundef 0, i32 noundef 1, ptr noundef nonnull [[X_TRAITS]]) #[[ATTR5]] -// CHECK-NEXT: store i64 [[CALL]], ptr [[X_ALLOC]], align 8, !tbaa [[LONG_TBAA3:![0-9]+]] +// CHECK-NEXT: store i64 [[CALL]], ptr [[X_ALLOC]], align 8, !tbaa [[LONG_TBAA7:![0-9]+]] // CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @[[GLOB2:[0-9]+]], i32 1, ptr nonnull @foo.omp_outlined, ptr nonnull [[X_ALLOC]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[X_ALLOC]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[X_TRAITS]]) #[[ATTR5]] @@ -87,23 +87,23 @@ void foo() { // CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[DOTOMP_LB]]) #[[ATTR5]] -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA7:![0-9]+]] +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA3:![0-9]+]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[DOTOMP_UB]]) #[[ATTR5]] -// CHECK-NEXT: store i32 1023, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA7]] +// CHECK-NEXT: store i32 1023, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA3]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[DOTOMP_STRIDE]]) #[[ATTR5]] -// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA7]] +// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA3]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[DOTOMP_IS_LAST]]) #[[ATTR5]] -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA7]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA7]] -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X_ALLOC]], align 8, !tbaa [[LONG_TBAA3]] +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA3]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X_ALLOC]], align 8, !tbaa [[LONG_TBAA7]] // CHECK-NEXT: [[CONV:%.*]] = inttoptr i64 [[TMP1]] to ptr // CHECK-NEXT: [[DOTX__VOID_ADDR:%.*]] = tail call ptr @__kmpc_alloc(i32 [[TMP0]], i64 8, ptr [[CONV]]) // CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr nonnull @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr nonnull [[DOTOMP_IS_LAST]], ptr nonnull [[DOTOMP_LB]], ptr nonnull [[DOTOMP_UB]], ptr nonnull [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA7]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA3]] // CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP2]], i32 1023) -// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA7]] +// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA3]] // CHECK-NEXT: call void @__kmpc_for_static_fini(ptr nonnull @[[GLOB1]], i32 [[TMP0]]) -// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X_ALLOC]], align 8, !tbaa [[LONG_TBAA3]] +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X_ALLOC]], align 8, !tbaa [[LONG_TBAA7]] // CHECK-NEXT: [[CONV5:%.*]] = inttoptr i64 [[TMP3]] to ptr // CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTX__VOID_ADDR]], ptr [[CONV5]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[DOTOMP_IS_LAST]]) #[[ATTR5]] @@ -113,10 +113,10 @@ void foo() { // CHECK-NEXT: ret void // //. -// CHECK: [[LONG_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} -// CHECK: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0} +// CHECK: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} // CHECK: [[META6]] = !{!"Simple C/C++ TBAA"} -// CHECK: [[INT_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} -// CHECK: [[META8]] = !{!"int", [[META5]], i64 0} +// CHECK: [[LONG_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// CHECK: [[META8]] = !{!"long", [[META5]], i64 0} //. diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp index 46c87eb31969d..4b99d7766c873 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -36,14 +36,14 @@ void test() { // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8, !tbaa [[ANYPTR_TBAA6:![0-9]+]] +// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8, !tbaa [[ANYPTR_TBAA10:![0-9]+]] // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]] // CHECK1: [[USER_CODE_ENTRY]]: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA10:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA6:![0-9]+]] // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void @@ -73,43 +73,43 @@ void test() { // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 8) // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[IB]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 // CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // CHECK1: [[COND_TRUE]]: // CHECK1-NEXT: br label %[[COND_END:.*]] // CHECK1: [[COND_FALSE]]: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: br label %[[COND_END]] // CHECK1: [[COND_END]]: // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP3]], %[[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: br label %[[OMP_INNER_FOR_COND:.*]] // CHECK1: [[OMP_INNER_FOR_COND]]: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_COND_CLEANUP:.*]] // CHECK1: [[OMP_INNER_FOR_COND_CLEANUP]]: // CHECK1-NEXT: br label %[[OMP_INNER_FOR_END:.*]] // CHECK1: [[OMP_INNER_FOR_BODY]]: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP]]) #[[ATTR4]] // CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP]], align 4, !tbaa [[FLOAT_TBAA14:![0-9]+]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP2]]) #[[ATTR4]] @@ -117,13 +117,13 @@ void test() { // CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR11:[0-9]+]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP2]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP8]], 4 -// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 -// CHECK1-NEXT: store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP10]], align 8, !tbaa [[ANYPTR_TBAA16:![0-9]+]] // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 @@ -135,9 +135,9 @@ void test() { // CHECK1: [[OMP_BODY_CONTINUE]]: // CHECK1-NEXT: br label %[[OMP_INNER_FOR_INC:.*]] // CHECK1: [[OMP_INNER_FOR_INC]]: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: br label %[[OMP_INNER_FOR_COND]] // CHECK1: [[OMP_INNER_FOR_END]]: // CHECK1-NEXT: br label %[[OMP_LOOP_EXIT:.*]] @@ -207,38 +207,38 @@ void test() { // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]], !nonnull [[META22]], !align [[META23]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[I]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[I]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label %[[OMP_PRECOND_THEN:.*]], label %[[OMP_PRECOND_END:.*]] // CHECK1: [[OMP_PRECOND_THEN]]: // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[PARTIAL_SUM5]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP]]) #[[ATTR4]] // CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP]], align 4, !tbaa [[FLOAT_TBAA14]] @@ -249,27 +249,27 @@ void test() { // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[I7]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK1-NEXT: br label %[[OMP_DISPATCH_COND:.*]] // CHECK1: [[OMP_DISPATCH_COND]]: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP8]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // CHECK1: [[COND_TRUE]]: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: br label %[[COND_END:.*]] // CHECK1: [[COND_FALSE]]: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: br label %[[COND_END]] // CHECK1: [[COND_END]]: // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], %[[COND_TRUE]] ], [ [[TMP16]], %[[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP19]], 1 // CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP18]], [[ADD9]] // CHECK1-NEXT: br i1 [[CMP10]], label %[[OMP_DISPATCH_BODY:.*]], label %[[OMP_DISPATCH_CLEANUP:.*]] @@ -278,26 +278,26 @@ void test() { // CHECK1: [[OMP_DISPATCH_BODY]]: // CHECK1-NEXT: br label %[[OMP_INNER_FOR_COND:.*]] // CHECK1: [[OMP_INNER_FOR_COND]]: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP21]], 1 // CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP20]], [[ADD11]] // CHECK1-NEXT: br i1 [[CMP12]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_COND_CLEANUP:.*]] // CHECK1: [[OMP_INNER_FOR_COND_CLEANUP]]: // CHECK1-NEXT: br label %[[OMP_INNER_FOR_END:.*]] // CHECK1: [[OMP_INNER_FOR_BODY]]: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP22]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP14]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP15]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP24]] to float // CHECK1-NEXT: store float [[CONV]], ptr [[REF_TMP15]], align 4, !tbaa [[FLOAT_TBAA14]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP16]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP25]] to float // CHECK1-NEXT: store float [[CONV17]], ptr [[REF_TMP16]], align 4, !tbaa [[FLOAT_TBAA14]] // CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[REF_TMP14]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP15]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR11]] @@ -309,25 +309,25 @@ void test() { // CHECK1: [[OMP_BODY_CONTINUE]]: // CHECK1-NEXT: br label %[[OMP_INNER_FOR_INC:.*]] // CHECK1: [[OMP_INNER_FOR_INC]]: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP26]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: br label %[[OMP_INNER_FOR_COND]] // CHECK1: [[OMP_INNER_FOR_END]]: // CHECK1-NEXT: br label %[[OMP_DISPATCH_INC:.*]] // CHECK1: [[OMP_DISPATCH_INC]]: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP27]], [[TMP28]] -// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP29]], [[TMP30]] -// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: br label %[[OMP_DISPATCH_COND]] // CHECK1: [[OMP_DISPATCH_END]]: // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP32]]) // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[PARTIAL_SUM5]], ptr [[TMP33]], align 8 @@ -508,16 +508,16 @@ void test() { // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2, !tbaa [[SHORT_TBAA27:![0-9]+]] -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[ANYPTR_TBAA6]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[ANYPTR_TBAA10]] // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[ANYPTR_TBAA6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[ANYPTR_TBAA10]] // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[ANYPTR_TBAA6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[ANYPTR_TBAA10]] // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]], ptr [[TMP8]]) #[[ATTR4]] // CHECK1-NEXT: ret void // @@ -528,14 +528,14 @@ void test() { // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8, !tbaa [[ANYPTR_TBAA6]] +// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8, !tbaa [[ANYPTR_TBAA10]] // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]] // CHECK1: [[USER_CODE_ENTRY]]: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] // CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void @@ -565,43 +565,43 @@ void test() { // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 16) // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[IB]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 // CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // CHECK1: [[COND_TRUE]]: // CHECK1-NEXT: br label %[[COND_END:.*]] // CHECK1: [[COND_FALSE]]: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: br label %[[COND_END]] // CHECK1: [[COND_END]]: // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP3]], %[[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: br label %[[OMP_INNER_FOR_COND:.*]] // CHECK1: [[OMP_INNER_FOR_COND]]: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_COND_CLEANUP:.*]] // CHECK1: [[OMP_INNER_FOR_COND_CLEANUP]]: // CHECK1-NEXT: br label %[[OMP_INNER_FOR_END:.*]] // CHECK1: [[OMP_INNER_FOR_BODY]]: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP]]) #[[ATTR4]] // CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP]], align 8, !tbaa [[DOUBLE_TBAA29:![0-9]+]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP2]]) #[[ATTR4]] @@ -609,13 +609,13 @@ void test() { // CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR11]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP2]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP8]], 4 -// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 -// CHECK1-NEXT: store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP10]], align 8, !tbaa [[ANYPTR_TBAA16]] // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 @@ -627,9 +627,9 @@ void test() { // CHECK1: [[OMP_BODY_CONTINUE]]: // CHECK1-NEXT: br label %[[OMP_INNER_FOR_INC:.*]] // CHECK1: [[OMP_INNER_FOR_INC]]: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: br label %[[OMP_INNER_FOR_COND]] // CHECK1: [[OMP_INNER_FOR_END]]: // CHECK1-NEXT: br label %[[OMP_LOOP_EXIT:.*]] @@ -699,38 +699,38 @@ void test() { // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]], !nonnull [[META22]], !align [[META35:![0-9]+]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[I]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[I]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label %[[OMP_PRECOND_THEN:.*]], label %[[OMP_PRECOND_END:.*]] // CHECK1: [[OMP_PRECOND_THEN]]: // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[PARTIAL_SUM5]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP]]) #[[ATTR4]] // CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP]], align 8, !tbaa [[DOUBLE_TBAA29]] @@ -741,27 +741,27 @@ void test() { // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[I7]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK1-NEXT: br label %[[OMP_DISPATCH_COND:.*]] // CHECK1: [[OMP_DISPATCH_COND]]: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP8]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // CHECK1: [[COND_TRUE]]: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: br label %[[COND_END:.*]] // CHECK1: [[COND_FALSE]]: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: br label %[[COND_END]] // CHECK1: [[COND_END]]: // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], %[[COND_TRUE]] ], [ [[TMP16]], %[[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP19]], 1 // CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP18]], [[ADD9]] // CHECK1-NEXT: br i1 [[CMP10]], label %[[OMP_DISPATCH_BODY:.*]], label %[[OMP_DISPATCH_CLEANUP:.*]] @@ -770,26 +770,26 @@ void test() { // CHECK1: [[OMP_DISPATCH_BODY]]: // CHECK1-NEXT: br label %[[OMP_INNER_FOR_COND:.*]] // CHECK1: [[OMP_INNER_FOR_COND]]: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP21]], 1 // CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP20]], [[ADD11]] // CHECK1-NEXT: br i1 [[CMP12]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_COND_CLEANUP:.*]] // CHECK1: [[OMP_INNER_FOR_COND_CLEANUP]]: // CHECK1-NEXT: br label %[[OMP_INNER_FOR_END:.*]] // CHECK1: [[OMP_INNER_FOR_BODY]]: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP22]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP14]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP15]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP24]] to double // CHECK1-NEXT: store double [[CONV]], ptr [[REF_TMP15]], align 8, !tbaa [[DOUBLE_TBAA29]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP16]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP25]] to double // CHECK1-NEXT: store double [[CONV17]], ptr [[REF_TMP16]], align 8, !tbaa [[DOUBLE_TBAA29]] // CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[REF_TMP14]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP15]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR11]] @@ -801,25 +801,25 @@ void test() { // CHECK1: [[OMP_BODY_CONTINUE]]: // CHECK1-NEXT: br label %[[OMP_INNER_FOR_INC:.*]] // CHECK1: [[OMP_INNER_FOR_INC]]: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP26]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: br label %[[OMP_INNER_FOR_COND]] // CHECK1: [[OMP_INNER_FOR_END]]: // CHECK1-NEXT: br label %[[OMP_DISPATCH_INC:.*]] // CHECK1: [[OMP_DISPATCH_INC]]: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP27]], [[TMP28]] -// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP29]], [[TMP30]] -// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: br label %[[OMP_DISPATCH_COND]] // CHECK1: [[OMP_DISPATCH_END]]: // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP32]]) // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[PARTIAL_SUM5]], ptr [[TMP33]], align 8 @@ -1013,16 +1013,16 @@ void test() { // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2, !tbaa [[SHORT_TBAA27]] -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[INT_TBAA6]] // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[ANYPTR_TBAA6]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[ANYPTR_TBAA10]] // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[ANYPTR_TBAA6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[ANYPTR_TBAA10]] // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[ANYPTR_TBAA6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[ANYPTR_TBAA10]] // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]], ptr [[TMP8]]) #[[ATTR4]] // CHECK1-NEXT: ret void // @@ -1113,22 +1113,22 @@ void test() { // CHECK1-NEXT: ret double [[TMP0]] // //. -// CHECK1: [[ANYPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} -// CHECK1: [[META7]] = !{!"any pointer", [[META8:![0-9]+]], i64 0} +// CHECK1: [[INT_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK1: [[META7]] = !{!"int", [[META8:![0-9]+]], i64 0} // CHECK1: [[META8]] = !{!"omnipotent char", [[META9:![0-9]+]], i64 0} // CHECK1: [[META9]] = !{!"Simple C++ TBAA"} -// CHECK1: [[INT_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0} -// CHECK1: [[META11]] = !{!"int", [[META8]], i64 0} +// CHECK1: [[ANYPTR_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0} +// CHECK1: [[META11]] = !{!"any pointer", [[META8]], i64 0} // CHECK1: [[INTPTR_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} -// CHECK1: [[META13]] = !{!"p1 int", [[META7]], i64 0} +// CHECK1: [[META13]] = !{!"p1 int", [[META11]], i64 0} // CHECK1: [[FLOAT_TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0} // CHECK1: [[META15]] = !{!"float", [[META8]], i64 0} // CHECK1: [[ANYPTR_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0} -// CHECK1: [[META17]] = !{!"any p2 pointer", [[META7]], i64 0} +// CHECK1: [[META17]] = !{!"any p2 pointer", [[META11]], i64 0} // CHECK1: [[_ZTSST7COMPLEXIFEPTR_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} -// CHECK1: [[META19]] = !{!"p1 _ZTSSt7complexIfE", [[META7]], i64 0} +// CHECK1: [[META19]] = !{!"p1 _ZTSSt7complexIfE", [[META11]], i64 0} // CHECK1: [[FLOATPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} -// CHECK1: [[META21]] = !{!"p1 float", [[META7]], i64 0} +// CHECK1: [[META21]] = !{!"p1 float", [[META11]], i64 0} // CHECK1: [[META22]] = !{} // CHECK1: [[META23]] = !{i64 4} // CHECK1: [[FLOAT_TBAA24]] = !{[[META25:![0-9]+]], [[META15]], i64 0} @@ -1139,9 +1139,9 @@ void test() { // CHECK1: [[DOUBLE_TBAA29]] = !{[[META30:![0-9]+]], [[META30]], i64 0} // CHECK1: [[META30]] = !{!"double", [[META8]], i64 0} // CHECK1: [[_ZTSST7COMPLEXIDEPTR_TBAA31]] = !{[[META32:![0-9]+]], [[META32]], i64 0} -// CHECK1: [[META32]] = !{!"p1 _ZTSSt7complexIdE", [[META7]], i64 0} +// CHECK1: [[META32]] = !{!"p1 _ZTSSt7complexIdE", [[META11]], i64 0} // CHECK1: [[DOUBLEPTR_TBAA33]] = !{[[META34:![0-9]+]], [[META34]], i64 0} -// CHECK1: [[META34]] = !{!"p1 double", [[META7]], i64 0} +// CHECK1: [[META34]] = !{!"p1 double", [[META11]], i64 0} // CHECK1: [[META35]] = !{i64 8} // CHECK1: [[DOUBLE_TBAA36]] = !{[[META37:![0-9]+]], [[META30]], i64 0} // CHECK1: [[META37]] = !{!"_ZTSSt7complexIdE", [[META30]], i64 0, [[META30]], i64 8} From 822c291aac4bb1cddb8e7dae183ad8b9fbcac81c Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Tue, 21 Oct 2025 10:49:27 +0100 Subject: [PATCH 17/99] [LV][NFC] Remove undef from phi incoming values (#163762) Split off from PR #163525, this standalone patch replaces use of undef as incoming PHI values with zero, in order to reduce the likelihood of contributors hitting the `undef deprecator` warning in github. --- .../LoopVectorize/2012-10-20-infloop.ll | 2 +- .../LoopVectorize/AArch64/pr33053.ll | 14 ++---- .../LoopVectorize/AArch64/pr36032.ll | 4 +- .../AArch64/sve-interleaved-accesses.ll | 8 ++-- .../LoopVectorize/ARM/arm-ieee-vectorize.ll | 34 ++++++-------- .../LoopVectorize/ARM/mve-known-trip-count.ll | 2 +- .../LoopVectorize/X86/fp80-widest-type.ll | 4 +- .../Transforms/LoopVectorize/X86/rauw-bug.ll | 8 ++-- .../X86/x86_fp80-interleaved-access.ll | 2 +- .../Transforms/LoopVectorize/i8-induction.ll | 2 +- .../LoopVectorize/if-pred-stores.ll | 4 +- .../LoopVectorize/incorrect-dom-info.ll | 4 +- .../LoopVectorize/interleaved-accesses.ll | 8 ++-- .../LoopVectorize/middle-block-dbg.ll | 2 +- .../LoopVectorize/multi-use-reduction-bug.ll | 4 +- .../LoopVectorize/reduction-inloop.ll | 42 ++++++++--------- .../Transforms/LoopVectorize/reverse_iter.ll | 10 ++-- .../scalable-first-order-recurrence.ll | 46 +++++++++---------- .../LoopVectorize/scev-exitlim-crash.ll | 4 +- ...tion-start-value-may-be-undef-or-poison.ll | 4 +- .../Transforms/LoopVectorize/uniform-blend.ll | 6 +-- 21 files changed, 100 insertions(+), 114 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/2012-10-20-infloop.ll b/llvm/test/Transforms/LoopVectorize/2012-10-20-infloop.ll index 2b5960eb91e50..90d0db2a9acff 100644 --- a/llvm/test/Transforms/LoopVectorize/2012-10-20-infloop.ll +++ b/llvm/test/Transforms/LoopVectorize/2012-10-20-infloop.ll @@ -56,7 +56,7 @@ if.then46: ; preds = %for.body40 br label %for.inc50 for.inc50: ; preds = %if.then46, %for.body40 - %k.1 = phi i32 [ undef, %for.body40 ], [ %inc47, %if.then46 ] + %k.1 = phi i32 [ 0, %for.body40 ], [ %inc47, %if.then46 ] %step.1 = phi i32 [ %step.0121, %for.body40 ], [ %inc47, %if.then46 ] %indvars.iv.next124 = add i64 %indvars.iv123, 1 %lftr.wideiv = trunc i64 %indvars.iv.next124 to i32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll index 20b536499afa7..ebf4a4fabb605 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll @@ -9,9 +9,7 @@ define i32 @fn1() local_unnamed_addr #0 { ; We expect the backend to expand all reductions. ; CHECK: @llvm.vector.reduce entry: - %0 = load i32, ptr @b, align 4, !tbaa !1 - %cmp40 = icmp sgt i32 %0, 0 - br i1 %cmp40, label %for.body.lr.ph, label %for.end + br label %for.body.lr.ph for.body.lr.ph: ; preds = %entry %1 = load ptr, ptr @a, align 8, !tbaa !5 @@ -21,8 +19,8 @@ for.body.lr.ph: ; preds = %entry for.body: ; preds = %for.body.lr.ph, %for.body %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] - %d.043 = phi i16 [ undef, %for.body.lr.ph ], [ %.sink28, %for.body ] - %c.042 = phi i16 [ undef, %for.body.lr.ph ], [ %c.0., %for.body ] + %d.043 = phi i16 [ 0, %for.body.lr.ph ], [ %.sink28, %for.body ] + %c.042 = phi i16 [ 0, %for.body.lr.ph ], [ %c.0., %for.body ] %arrayidx = getelementptr inbounds i16, ptr %1, i64 %indvars.iv %4 = load i16, ptr %arrayidx, align 2, !tbaa !7 %cmp2 = icmp sgt i16 %c.042, %4 @@ -33,10 +31,8 @@ for.body: ; preds = %for.body.lr.ph, %fo %cmp = icmp slt i64 %indvars.iv.next, %3 br i1 %cmp, label %for.body, label %for.end -for.end: ; preds = %for.body, %entry - %c.0.lcssa = phi i16 [ undef, %entry ], [ %c.0., %for.body ] - %d.0.lcssa = phi i16 [ undef, %entry ], [ %.sink28, %for.body ] - %cmp26 = icmp sgt i16 %c.0.lcssa, %d.0.lcssa +for.end: ; preds = %for.body + %cmp26 = icmp sgt i16 %c.0., %.sink28 %conv27 = zext i1 %cmp26 to i32 ret i32 %conv27 } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll index 44820e061211a..33ce300d68592 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll @@ -18,7 +18,7 @@ define void @_Z1dv() local_unnamed_addr #0 { ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[F_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD5:%.*]], [[FOR_COND_CLEANUP:%.*]] ] -; CHECK-NEXT: [[G_0:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[G_1_LCSSA:%.*]], [[FOR_COND_CLEANUP]] ] +; CHECK-NEXT: [[G_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[G_1_LCSSA:%.*]], [[FOR_COND_CLEANUP]] ] ; CHECK-NEXT: [[CMP12:%.*]] = icmp ult i32 [[G_0]], 4 ; CHECK-NEXT: [[CONV:%.*]] = and i32 [[F_0]], 65535 ; CHECK-NEXT: br i1 [[CMP12]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP]] @@ -50,7 +50,7 @@ entry: for.cond: ; preds = %for.cond.cleanup, %entry %f.0 = phi i32 [ 0, %entry ], [ %add5, %for.cond.cleanup ] - %g.0 = phi i32 [ undef, %entry ], [ %g.1.lcssa, %for.cond.cleanup ] + %g.0 = phi i32 [ 0, %entry ], [ %g.1.lcssa, %for.cond.cleanup ] %cmp12 = icmp ult i32 %g.0, 4 %conv = and i32 %f.0, 65535 br i1 %cmp12, label %for.body.lr.ph, label %for.cond.cleanup diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index de804600f811a..786a2aab6d0e7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -728,8 +728,8 @@ define void @int_float_struct(ptr nocapture readonly %p) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, float undef, i32 0), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi [ insertelement ( zeroinitializer, i32 undef, i32 0), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_INTFLOAT:%.*]], ptr [[P:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) @@ -755,8 +755,8 @@ for.cond.cleanup: ; preds = %for.body for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %SumB.014 = phi float [ undef, %entry ], [ %add3, %for.body ] - %SumA.013 = phi i32 [ undef, %entry ], [ %add, %for.body ] + %SumB.014 = phi float [ 0.0e+00, %entry ], [ %add3, %for.body ] + %SumA.013 = phi i32 [ 0, %entry ], [ %add, %for.body ] %a = getelementptr inbounds %struct.IntFloat, ptr %p, i64 %indvars.iv, i32 0 %load1 = load i32, ptr %a, align 4 %add = add nsw i32 %load1, %SumA.013 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll b/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll index 44a48a9c262f5..0f398a69a4bc7 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll @@ -84,15 +84,14 @@ for.end: ; preds = %for.end.loopexit, % ; CHECK: We can vectorize this loop! define i32 @redi(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) { entry: - %cmp5 = icmp eq i32 %N, 0 - br i1 %cmp5, label %for.end, label %for.body.preheader + br label %for.body.preheader for.body.preheader: ; preds = %entry br label %for.body for.body: ; preds = %for.body.preheader, %for.body %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %Red.06 = phi i32 [ %add, %for.body ], [ undef, %for.body.preheader ] + %Red.06 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.07 %0 = load i32, ptr %arrayidx, align 4 %arrayidx1 = getelementptr inbounds i32, ptr %b, i32 %i.07 @@ -107,9 +106,8 @@ for.end.loopexit: ; preds = %for.body %add.lcssa = phi i32 [ %add, %for.body ] br label %for.end -for.end: ; preds = %for.end.loopexit, %entry - %Red.0.lcssa = phi i32 [ undef, %entry ], [ %add.lcssa, %for.end.loopexit ] - ret i32 %Red.0.lcssa +for.end: ; preds = %for.end.loopexit + ret i32 %add.lcssa } ; Floating-point loops need fast-math to be vectorizeable @@ -121,15 +119,14 @@ for.end: ; preds = %for.end.loopexit, % ; DARWIN: We can vectorize this loop! define float @redf(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) { entry: - %cmp5 = icmp eq i32 %N, 0 - br i1 %cmp5, label %for.end, label %for.body.preheader + br label %for.body.preheader for.body.preheader: ; preds = %entry br label %for.body for.body: ; preds = %for.body.preheader, %for.body %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %Red.06 = phi float [ %add, %for.body ], [ undef, %for.body.preheader ] + %Red.06 = phi float [ %add, %for.body ], [ 0.0e+00, %for.body.preheader ] %arrayidx = getelementptr inbounds float, ptr %a, i32 %i.07 %0 = load float, ptr %arrayidx, align 4 %arrayidx1 = getelementptr inbounds float, ptr %b, i32 %i.07 @@ -144,9 +141,8 @@ for.end.loopexit: ; preds = %for.body %add.lcssa = phi float [ %add, %for.body ] br label %for.end -for.end: ; preds = %for.end.loopexit, %entry - %Red.0.lcssa = phi float [ undef, %entry ], [ %add.lcssa, %for.end.loopexit ] - ret float %Red.0.lcssa +for.end: ; preds = %for.end.loopexit + ret float %add.lcssa } ; Make sure calls that turn into builtins are also covered @@ -252,7 +248,7 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %Red.06 = phi i32 [ %add, %for.body ], [ undef, %for.body.preheader ] + %Red.06 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.07 %0 = load i32, ptr %arrayidx, align 4 %arrayidx1 = getelementptr inbounds i32, ptr %b, i32 %i.07 @@ -268,7 +264,7 @@ for.end.loopexit: ; preds = %for.body br label %for.end for.end: ; preds = %for.end.loopexit, %entry - %Red.0.lcssa = phi i32 [ undef, %entry ], [ %add.lcssa, %for.end.loopexit ] + %Red.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %for.end.loopexit ] ret i32 %Red.0.lcssa } @@ -277,15 +273,14 @@ for.end: ; preds = %for.end.loopexit, % ; CHECK: We can vectorize this loop! define float @redf_fast(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) { entry: - %cmp5 = icmp eq i32 %N, 0 - br i1 %cmp5, label %for.end, label %for.body.preheader + br label %for.body.preheader for.body.preheader: ; preds = %entry br label %for.body for.body: ; preds = %for.body.preheader, %for.body %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %Red.06 = phi float [ %add, %for.body ], [ undef, %for.body.preheader ] + %Red.06 = phi float [ %add, %for.body ], [ 0.0e+00, %for.body.preheader ] %arrayidx = getelementptr inbounds float, ptr %a, i32 %i.07 %0 = load float, ptr %arrayidx, align 4 %arrayidx1 = getelementptr inbounds float, ptr %b, i32 %i.07 @@ -300,9 +295,8 @@ for.end.loopexit: ; preds = %for.body %add.lcssa = phi float [ %add, %for.body ] br label %for.end -for.end: ; preds = %for.end.loopexit, %entry - %Red.0.lcssa = phi float [ undef, %entry ], [ %add.lcssa, %for.end.loopexit ] - ret float %Red.0.lcssa +for.end: ; preds = %for.end.loopexit + ret float %add.lcssa } ; Make sure calls that turn into builtins are also covered diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-known-trip-count.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-known-trip-count.ll index fe3504bc4b679..23609b1041ae2 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-known-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-known-trip-count.ll @@ -249,7 +249,7 @@ define dso_local i32 @predicated_test(i32 noundef %0, ptr %glob) #0 { br label %7 7: ; preds = %5, %155 - %8 = phi i32 [ %10, %155 ], [ undef, %5 ] + %8 = phi i32 [ %10, %155 ], [ 0, %5 ] %9 = phi i32 [ %156, %155 ], [ 0, %5 ] %10 = shl i32 %8, 4 store i32 %10, ptr %6, align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp80-widest-type.ll b/llvm/test/Transforms/LoopVectorize/X86/fp80-widest-type.ll index 2ef9d4b40d9a5..3718ad23c0612 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fp80-widest-type.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fp80-widest-type.ll @@ -14,7 +14,7 @@ define x86_fp80 @test() { ; CHECK-NEXT: br label [[FOR_BODY3_I_3:%.*]] ; CHECK: for.body3.i.3: ; CHECK-NEXT: [[N_ADDR_112_I_3:%.*]] = phi i64 [ [[DEC_I_3:%.*]], [[FOR_BODY3_I_3]] ], [ 24, [[FOO_EXIT:%.*]] ] -; CHECK-NEXT: [[X_ADDR_111_I_3:%.*]] = phi x86_fp80 [ [[MUL_I_3:%.*]], [[FOR_BODY3_I_3]] ], [ undef, [[FOO_EXIT]] ] +; CHECK-NEXT: [[X_ADDR_111_I_3:%.*]] = phi x86_fp80 [ [[MUL_I_3:%.*]], [[FOR_BODY3_I_3]] ], [ 0xK00000000000000000000, [[FOO_EXIT]] ] ; CHECK-NEXT: [[MUL_I_3]] = fmul x86_fp80 [[X_ADDR_111_I_3]], 0xK40008000000000000000 ; CHECK-NEXT: [[DEC_I_3]] = add nsw i64 [[N_ADDR_112_I_3]], -1 ; CHECK-NEXT: [[CMP2_I_3:%.*]] = icmp sgt i64 [[N_ADDR_112_I_3]], 1 @@ -28,7 +28,7 @@ foo.exit: for.body3.i.3: ; preds = %for.body3.i.3, %foo.exit %n.addr.112.i.3 = phi i64 [ %dec.i.3, %for.body3.i.3 ], [ 24, %foo.exit ] - %x.addr.111.i.3 = phi x86_fp80 [ %mul.i.3, %for.body3.i.3 ], [ undef, %foo.exit ] + %x.addr.111.i.3 = phi x86_fp80 [ %mul.i.3, %for.body3.i.3 ], [ zeroinitializer, %foo.exit ] %mul.i.3 = fmul x86_fp80 %x.addr.111.i.3, 0xK40008000000000000000 %dec.i.3 = add nsw i64 %n.addr.112.i.3, -1 %cmp2.i.3 = icmp sgt i64 %n.addr.112.i.3, 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/rauw-bug.ll b/llvm/test/Transforms/LoopVectorize/X86/rauw-bug.ll index df1c4f979986c..5321d69a9f5f5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/rauw-bug.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/rauw-bug.ll @@ -21,10 +21,10 @@ while.cond63.preheader.while.end76_crit_edge: ret void while.body: - %d2_fx.015 = phi double [ %sub52, %while.body ], [ undef, %entry ] - %d2_fy.014 = phi double [ %sub58, %while.body ], [ undef, %entry ] - %d3_fy.013 = phi double [ %div56, %while.body ], [ undef, %entry ] - %d3_fx.012 = phi double [ %div50, %while.body ], [ undef, %entry ] + %d2_fx.015 = phi double [ %sub52, %while.body ], [ 0.0e+00, %entry ] + %d2_fy.014 = phi double [ %sub58, %while.body ], [ 0.0e+00, %entry ] + %d3_fy.013 = phi double [ %div56, %while.body ], [ 0.0e+00, %entry ] + %d3_fx.012 = phi double [ %div50, %while.body ], [ 0.0e+00, %entry ] %div50 = fmul double %d3_fx.012, 1.250000e-01 %sub52 = fsub double 0.000000e+00, %div50 %div56 = fmul double %d3_fy.013, 1.250000e-01 diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-interleaved-access.ll b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-interleaved-access.ll index 368361fd760ec..0f55d79b2d299 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-interleaved-access.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-interleaved-access.ll @@ -15,7 +15,7 @@ for.cond.cleanup: for.body: %i.09 = phi i16 [ 0, %entry ], [ %add3, %for.body ] - %res.08 = phi x86_fp80 [ undef, %entry ], [ %3, %for.body ] + %res.08 = phi x86_fp80 [ zeroinitializer, %entry ], [ %3, %for.body ] %arrayidx = getelementptr inbounds x86_fp80, ptr %a, i16 %i.09 %0 = load x86_fp80, ptr %arrayidx, align 1 %add = or i16 %i.09, 1 diff --git a/llvm/test/Transforms/LoopVectorize/i8-induction.ll b/llvm/test/Transforms/LoopVectorize/i8-induction.ll index 220fd64e6a829..712c75d3ed042 100644 --- a/llvm/test/Transforms/LoopVectorize/i8-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/i8-induction.ll @@ -20,7 +20,7 @@ scalar.ph: for.body: %mul16 = phi i8 [ 0, %scalar.ph ], [ %mul, %for.body ] ; <------- i8 induction var. - %c.015 = phi i8 [ undef, %scalar.ph ], [ %conv8, %for.body ] + %c.015 = phi i8 [ 0, %scalar.ph ], [ %conv8, %for.body ] %conv2 = sext i8 %c.015 to i32 %tobool = icmp ne i8 %c.015, 0 %.sink = select i1 %tobool, i8 %c.015, i8 %0 diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index f7376a0f8e205..c164c4a46bd94 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -277,7 +277,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; UNROLL-NOSIMPLIFY-NEXT: [[INEWCHUNKS_2_LCSSA:%.*]] = phi i32 [ [[INEWCHUNKS_2]], [[FOR_INC23]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC26]] ; UNROLL-NOSIMPLIFY: for.inc26: -; UNROLL-NOSIMPLIFY-NEXT: [[INEWCHUNKS_1_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY9]] ], [ [[INEWCHUNKS_2_LCSSA]], [[FOR_INC26_LOOPEXIT]] ] +; UNROLL-NOSIMPLIFY-NEXT: [[INEWCHUNKS_1_LCSSA:%.*]] = phi i32 [ 0, [[FOR_BODY9]] ], [ [[INEWCHUNKS_2_LCSSA]], [[FOR_INC26_LOOPEXIT]] ] ; UNROLL-NOSIMPLIFY-NEXT: unreachable ; ; VEC-LABEL: @bug18724( @@ -376,7 +376,7 @@ for.inc23: br i1 %cmp13, label %for.body14, label %for.inc26 for.inc26: - %iNewChunks.1.lcssa = phi i32 [ undef, %for.body9 ], [ %iNewChunks.2, %for.inc23 ] + %iNewChunks.1.lcssa = phi i32 [ 0, %for.body9 ], [ %iNewChunks.2, %for.inc23 ] unreachable } diff --git a/llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll b/llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll index 9e750022d8c4c..5cf99b8998603 100644 --- a/llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll +++ b/llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll @@ -53,8 +53,8 @@ thread-pre-split.loopexit: ; preds = %11, %.thread-pre-sp br i1 false, label %thread-pre-split._crit_edge, label %.lr.ph21 .lr.ph21: ; preds = %26, %thread-pre-split.loopexit, %thread-pre-split.preheader - %d.020 = phi ptr [ undef, %26 ], [ %d.1.lcssa, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ] - %10 = phi i64 [ %28, %26 ], [ undef, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ] + %d.020 = phi ptr [ zeroinitializer, %26 ], [ %d.1.lcssa, %thread-pre-split.loopexit ], [ zeroinitializer, %thread-pre-split.preheader ] + %10 = phi i64 [ %28, %26 ], [ zeroinitializer, %thread-pre-split.loopexit ], [ zeroinitializer, %thread-pre-split.preheader ] br i1 %arg, label %11, label %22 ;