From 304e520883415d2bd63954d1d7bd6f1069841537 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Tue, 1 Jul 2025 23:41:07 +0800 Subject: [PATCH] [CHERIOT] Add a TTI callback to disallow LSR base register expressions. This allows us to turn on SCEV for cheriot, with the caveat that we need to disallow LSR base register expressions that are negatively indexed, since they may not be representable on cheriot. --- .../llvm/Analysis/TargetTransformInfo.h | 6 +++ .../llvm/Analysis/TargetTransformInfoImpl.h | 2 + llvm/lib/Analysis/ScalarEvolution.cpp | 6 --- llvm/lib/Analysis/TargetTransformInfo.cpp | 4 ++ .../Target/RISCV/RISCVTargetTransformInfo.cpp | 21 +++++++++ .../Target/RISCV/RISCVTargetTransformInfo.h | 2 + .../Transforms/Scalar/LoopStrengthReduce.cpp | 4 ++ .../RISCV/cheri/calling-conv-il32pc64.ll | 3 +- .../RISCV/cheri/calling-conv-l64pc128.ll | 3 +- .../LoopIdiom/cheri-preserve-tags-memcpy.ll | 42 ++++++++--------- .../LoopIdiom/cheri-preserve-tags-store.ll | 26 ++-------- .../LoopStrengthReduce/cheriot-strlen.ll | 47 +++++++++++++++++++ 12 files changed, 110 insertions(+), 56 deletions(-) create mode 100644 llvm/test/Transforms/LoopStrengthReduce/cheriot-strlen.ll diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index f07a4aea34d29..1356dbe09ba86 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -787,6 +787,8 @@ class TargetTransformInfo { AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const; + bool isLegalBaseRegForLSR(const SCEV *) const; + /// Return true if the target supports masked store. bool isLegalMaskedStore(Type *DataType, Align Alignment) const; /// Return true if the target supports masked load. @@ -1996,6 +1998,7 @@ class TargetTransformInfo::Concept { TargetLibraryInfo *LibInfo) = 0; virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0; + virtual bool isLegalBaseRegForLSR(const SCEV *) const = 0; virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0; virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0; virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0; @@ -2534,6 +2537,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { ScalarEvolution *SE) const override { return Impl.getPreferredAddressingMode(L, SE); } + bool isLegalBaseRegForLSR(const SCEV *S) const override { + return Impl.isLegalBaseRegForLSR(S); + } bool isLegalMaskedStore(Type *DataType, Align Alignment) override { return Impl.isLegalMaskedStore(DataType, Alignment); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index eee58dbc2c337..404cd6c22b1f3 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -276,6 +276,8 @@ class TargetTransformInfoImplBase { return TTI::AMK_None; } + bool isLegalBaseRegForLSR(const SCEV *S) const { return true; } + bool isLegalMaskedStore(Type *DataType, Align Alignment) const { return false; } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 3b6aa4b6f8356..36fe036aa9e9f 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6296,12 +6296,6 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { assert(GEP->getSourceElementType()->isSized() && "GEP source element type must be sized"); - const DataLayout &DL = F.getParent()->getDataLayout(); - // FIXME: Ideally, we should teach Scalar Evolution to - // understand fat pointers. - if (DL.isFatPointer(GEP->getPointerOperandType()->getPointerAddressSpace())) - return getUnknown(GEP); - SmallVector IndexExprs; for (Value *Index : GEP->indices()) IndexExprs.push_back(getSCEV(Index)); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 8b9722d047edc..9b0f54ae6ece4 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -463,6 +463,10 @@ TargetTransformInfo::getPreferredAddressingMode(const Loop *L, return TTIImpl->getPreferredAddressingMode(L, SE); } +bool TargetTransformInfo::isLegalBaseRegForLSR(const SCEV *S) const { + return TTIImpl->isLegalBaseRegForLSR(S); +} + bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, Align Alignment) const { return TTIImpl->isLegalMaskedStore(DataType, Alignment); diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index add82dc80c429..da1a1103cf033 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -9,6 +9,7 @@ #include "RISCVTargetTransformInfo.h" #include "MCTargetDesc/RISCVMatInt.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/CostTable.h" @@ -2401,6 +2402,26 @@ RISCVTTIImpl::getPreferredAddressingMode(const Loop *L, return BasicTTIImplBase::getPreferredAddressingMode(L, SE); } +bool RISCVTTIImpl::isLegalBaseRegForLSR(const SCEV *S) const { + if (ST->hasVendorXCheriot()) { + // Disallow any add-recurrence SCEV where the base offset is negative. + // This is needed because CHERIoT can't represent pointers before the + // beginning of an array. + if (const auto *AddRec = dyn_cast(S)) { + const auto *StartAdd = dyn_cast(AddRec->getStart()); + if (StartAdd) { + const auto *Offset = dyn_cast(StartAdd->getOperand(0)); + if (Offset && Offset->getValue()->isNegative()) + return false; + Offset = dyn_cast(StartAdd->getOperand(1)); + if (Offset && Offset->getValue()->isNegative()) + return false; + } + } + } + return BasicTTIImplBase::isLegalBaseRegForLSR(S); +} + bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) { // RISC-V specific here are "instruction number 1st priority". diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 119089f9cfb82..31ae6c9a3c3ea 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -394,6 +394,8 @@ class RISCVTTIImpl : public BasicTTIImplBase { TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const; + bool isLegalBaseRegForLSR(const SCEV *S) const; + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { if (Vector) return RISCVRegisterClass::VRRC; diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 2c3d64b0e07d9..a30b48550a69c 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1516,6 +1516,10 @@ void Cost::RateFormula(const Formula &F, return; } for (const SCEV *BaseReg : F.BaseRegs) { + if (!TTI->isLegalBaseRegForLSR(BaseReg)) { + Lose(); + return; + } if (VisitedRegs.count(BaseReg)) { Lose(); return; diff --git a/llvm/test/CodeGen/RISCV/cheri/calling-conv-il32pc64.ll b/llvm/test/CodeGen/RISCV/cheri/calling-conv-il32pc64.ll index 8f85e4e1c1152..c803b2af2464c 100644 --- a/llvm/test/CodeGen/RISCV/cheri/calling-conv-il32pc64.ll +++ b/llvm/test/CodeGen/RISCV/cheri/calling-conv-il32pc64.ll @@ -13,13 +13,12 @@ define i32 @get_ith_word(i32 signext %i, ...) addrspace(200) nounwind { ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: .LBB0_1: # %while.cond ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmove ca2, ca1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: cincoffset ca1, ca1, 4 ; CHECK-NEXT: bgtz a0, .LBB0_1 ; CHECK-NEXT: # %bb.2: # %while.end ; CHECK-NEXT: csc ca1, 8(csp) -; CHECK-NEXT: clw a0, 0(ca2) +; CHECK-NEXT: clw a0, -4(ca1) ; CHECK-NEXT: cincoffset csp, csp, 16 ; CHECK-NEXT: cret entry: diff --git a/llvm/test/CodeGen/RISCV/cheri/calling-conv-l64pc128.ll b/llvm/test/CodeGen/RISCV/cheri/calling-conv-l64pc128.ll index 5334d8536fb1d..0be0086340c65 100644 --- a/llvm/test/CodeGen/RISCV/cheri/calling-conv-l64pc128.ll +++ b/llvm/test/CodeGen/RISCV/cheri/calling-conv-l64pc128.ll @@ -13,13 +13,12 @@ define i32 @get_ith_word(i32 signext %i, ...) addrspace(200) nounwind { ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: .LBB0_1: # %while.cond ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmove ca2, ca1 ; CHECK-NEXT: addiw a0, a0, -1 ; CHECK-NEXT: cincoffset ca1, ca1, 4 ; CHECK-NEXT: bgtz a0, .LBB0_1 ; CHECK-NEXT: # %bb.2: # %while.end ; CHECK-NEXT: csc ca1, 0(csp) -; CHECK-NEXT: clw a0, 0(ca2) +; CHECK-NEXT: clw a0, -4(ca1) ; CHECK-NEXT: cincoffset csp, csp, 16 ; CHECK-NEXT: cret entry: diff --git a/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-memcpy.ll b/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-memcpy.ll index 706cea8c1f738..55660e16af202 100644 --- a/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-memcpy.ll +++ b/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-memcpy.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --force-update +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature ; Check that we retain the {must,no}_preserve_cheri_tags attribute when merging memcpy loops. ; FIXME: this does not work as expected with addrspace(200) pointers yet since we need SCEV. ; RUN: sed -e 's/-A200-P200-G200//g' -e 's/.p200/.p0/g' %s | \ @@ -32,9 +32,9 @@ define void @no_preserve(ptr addrspace("A") noalias writeonly %dst, ptr addrspac ; HYBRID-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[COUNT]], 0 ; HYBRID-NEXT: br i1 [[CMP1]], label [[BB1_PREHEADER:%.*]], label [[BB2:%.*]] ; HYBRID: bb1.preheader: -; HYBRID-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 16 +; HYBRID-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 16 ; HYBRID-NEXT: [[TMP1:%.*]] = shl nuw i64 [[COUNT]], 4 -; HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[DST]], ptr align 4 [[UGLYGEP]], i64 [[TMP1]], i1 false) #[[ATTR2:[0-9]+]] +; HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[DST]], ptr align 4 [[SCEVGEP]], i64 [[TMP1]], i1 false) #[[ATTR2:[0-9]+]] ; HYBRID-NEXT: br label [[BB2]] ; HYBRID: bb2: ; HYBRID-NEXT: ret void @@ -42,15 +42,12 @@ define void @no_preserve(ptr addrspace("A") noalias writeonly %dst, ptr addrspac ; PURECAP-LABEL: define {{[^@]+}}@no_preserve ; PURECAP-SAME: (ptr addrspace(200) noalias writeonly [[DST:%.*]], ptr addrspace(200) noalias readonly [[SRC:%.*]], i64 [[COUNT:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR0:[0-9]+]] { ; PURECAP-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[COUNT]], 0 -; PURECAP-NEXT: br i1 [[CMP1]], label [[BB1:%.*]], label [[BB2:%.*]] -; PURECAP: bb1: -; PURECAP-NEXT: [[IDX:%.*]] = phi i64 [ [[ADD:%.*]], [[BB1]] ], [ 0, [[TMP0:%.*]] ] -; PURECAP-NEXT: [[LDST:%.*]] = getelementptr [[STRUCT_WOMBAT:%.*]], ptr addrspace(200) [[DST]], i64 [[IDX]] -; PURECAP-NEXT: [[ADD]] = add nuw nsw i64 [[IDX]], 1 -; PURECAP-NEXT: [[LSRC:%.*]] = getelementptr [[STRUCT_WOMBAT]], ptr addrspace(200) [[SRC]], i64 [[ADD]] -; PURECAP-NEXT: tail call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) noundef nonnull align 16 dereferenceable(16) [[LDST]], ptr addrspace(200) noundef nonnull align 4 dereferenceable(16) [[LSRC]], i64 16, i1 false) #[[ATTR2:[0-9]+]] -; PURECAP-NEXT: [[CMP2:%.*]] = icmp slt i64 [[ADD]], [[COUNT]] -; PURECAP-NEXT: br i1 [[CMP2]], label [[BB1]], label [[BB2]] +; PURECAP-NEXT: br i1 [[CMP1]], label [[BB1_PREHEADER:%.*]], label [[BB2:%.*]] +; PURECAP: bb1.preheader: +; PURECAP-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr addrspace(200) [[SRC]], i64 16 +; PURECAP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[COUNT]], 4 +; PURECAP-NEXT: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 [[DST]], ptr addrspace(200) align 4 [[SCEVGEP]], i64 [[TMP1]], i1 false) #[[ATTR2:[0-9]+]] +; PURECAP-NEXT: br label [[BB2]] ; PURECAP: bb2: ; PURECAP-NEXT: ret void ; @@ -82,9 +79,9 @@ define void @must_preserve(ptr addrspace("A") noalias writeonly %dst, ptr addrsp ; HYBRID-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[COUNT]], 0 ; HYBRID-NEXT: br i1 [[CMP1]], label [[BB1_PREHEADER:%.*]], label [[BB2:%.*]] ; HYBRID: bb1.preheader: -; HYBRID-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 16 +; HYBRID-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 16 ; HYBRID-NEXT: [[TMP1:%.*]] = shl nuw i64 [[COUNT]], 4 -; HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[DST]], ptr align 4 [[UGLYGEP]], i64 [[TMP1]], i1 false) #[[ATTR3:[0-9]+]] +; HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[DST]], ptr align 4 [[SCEVGEP]], i64 [[TMP1]], i1 false) #[[ATTR3:[0-9]+]] ; HYBRID-NEXT: br label [[BB2]] ; HYBRID: bb2: ; HYBRID-NEXT: ret void @@ -92,15 +89,12 @@ define void @must_preserve(ptr addrspace("A") noalias writeonly %dst, ptr addrsp ; PURECAP-LABEL: define {{[^@]+}}@must_preserve ; PURECAP-SAME: (ptr addrspace(200) noalias writeonly [[DST:%.*]], ptr addrspace(200) noalias readonly [[SRC:%.*]], i64 [[COUNT:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR0]] { ; PURECAP-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[COUNT]], 0 -; PURECAP-NEXT: br i1 [[CMP1]], label [[BB1:%.*]], label [[BB2:%.*]] -; PURECAP: bb1: -; PURECAP-NEXT: [[IDX:%.*]] = phi i64 [ [[ADD:%.*]], [[BB1]] ], [ 0, [[TMP0:%.*]] ] -; PURECAP-NEXT: [[LDST:%.*]] = getelementptr [[STRUCT_WOMBAT:%.*]], ptr addrspace(200) [[DST]], i64 [[IDX]] -; PURECAP-NEXT: [[ADD]] = add nuw nsw i64 [[IDX]], 1 -; PURECAP-NEXT: [[LSRC:%.*]] = getelementptr [[STRUCT_WOMBAT]], ptr addrspace(200) [[SRC]], i64 [[ADD]] -; PURECAP-NEXT: tail call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) noundef nonnull align 16 dereferenceable(16) [[LDST]], ptr addrspace(200) noundef nonnull align 4 dereferenceable(16) [[LSRC]], i64 16, i1 false) #[[ATTR3:[0-9]+]] -; PURECAP-NEXT: [[CMP2:%.*]] = icmp slt i64 [[ADD]], [[COUNT]] -; PURECAP-NEXT: br i1 [[CMP2]], label [[BB1]], label [[BB2]] +; PURECAP-NEXT: br i1 [[CMP1]], label [[BB1_PREHEADER:%.*]], label [[BB2:%.*]] +; PURECAP: bb1.preheader: +; PURECAP-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr addrspace(200) [[SRC]], i64 16 +; PURECAP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[COUNT]], 4 +; PURECAP-NEXT: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 [[DST]], ptr addrspace(200) align 4 [[SCEVGEP]], i64 [[TMP1]], i1 false) #[[ATTR3:[0-9]+]] +; PURECAP-NEXT: br label [[BB2]] ; PURECAP: bb2: ; PURECAP-NEXT: ret void ; @@ -136,3 +130,5 @@ attributes #2 = { must_preserve_cheri_tags } ; CHECK: attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ; CHECK: attributes #[[ATTR2]] = { no_preserve_cheri_tags } ; CHECK: attributes #[[ATTR3]] = { must_preserve_cheri_tags } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-store.ll b/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-store.ll index b99cd3366dcc7..5d03e1dd8d8bb 100644 --- a/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-store.ll +++ b/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-store.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --force-update +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature ; We should be setting no_preserve_cheri_tags for loops that copy integers, and must_preserve_cheri_tags for capability copies. ; FIXME: this does not work with addrspace(200) pointers yet since we need SCEV. ; RUN: sed -e 's/-A200-P200-G200//g' %s | opt --passes='require,loop(loop-idiom,loop-deletion),simplifycfg' -aa-pipeline=basic-aa -S | \ @@ -24,17 +24,7 @@ define void @get_state(ptr addrspace("A") nocapture noalias %state) addrspace("P ; PURECAP-LABEL: define {{[^@]+}}@get_state ; PURECAP-SAME: (ptr addrspace(200) noalias nocapture [[STATE:%.*]]) addrspace(200) { ; PURECAP-NEXT: entry: -; PURECAP-NEXT: br label [[FOR_BODY:%.*]] -; PURECAP: for.body: -; PURECAP-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; PURECAP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [25 x i32], ptr addrspace(200) @nocap, i64 0, i64 [[I_08]] -; PURECAP-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(200) [[ARRAYIDX]], align 4 -; PURECAP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_STATE:%.*]], ptr addrspace(200) [[STATE]], i64 0, i32 0, i64 [[I_08]] -; PURECAP-NEXT: store i32 [[TMP0]], ptr addrspace(200) [[ARRAYIDX2]], align 4 -; PURECAP-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 -; PURECAP-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INC]], 25 -; PURECAP-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] -; PURECAP: for.end: +; PURECAP-NEXT: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 4 [[STATE]], ptr addrspace(200) align 4 @nocap, i64 100, i1 false) #[[ATTR1:[0-9]+]] ; PURECAP-NEXT: ret void ; entry: @@ -67,17 +57,7 @@ define void @get_cap_state(ptr addrspace("A") nocapture noalias %state) addrspac ; PURECAP-LABEL: define {{[^@]+}}@get_cap_state ; PURECAP-SAME: (ptr addrspace(200) noalias nocapture [[STATE:%.*]]) addrspace(200) { ; PURECAP-NEXT: entry: -; PURECAP-NEXT: br label [[FOR_BODY:%.*]] -; PURECAP: for.body: -; PURECAP-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; PURECAP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [25 x ptr addrspace(200)], ptr addrspace(200) @cap, i64 0, i64 [[I_08]] -; PURECAP-NEXT: [[TMP0:%.*]] = load ptr addrspace(200), ptr addrspace(200) [[ARRAYIDX]], align 16 -; PURECAP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_CAPSTATE:%.*]], ptr addrspace(200) [[STATE]], i64 0, i32 0, i64 [[I_08]] -; PURECAP-NEXT: store ptr addrspace(200) [[TMP0]], ptr addrspace(200) [[ARRAYIDX2]], align 16 -; PURECAP-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 -; PURECAP-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INC]], 25 -; PURECAP-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] -; PURECAP: for.end: +; PURECAP-NEXT: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 [[STATE]], ptr addrspace(200) align 16 @cap, i64 400, i1 false) #[[ATTR2:[0-9]+]] ; PURECAP-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/LoopStrengthReduce/cheriot-strlen.ll b/llvm/test/Transforms/LoopStrengthReduce/cheriot-strlen.ll new file mode 100644 index 0000000000000..ab13dd9e73b17 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/cheriot-strlen.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s +target datalayout = "e-m:e-p:32:32-i64:64-n32-S128-pf200:64:64:64:32-A200-P200-G200" +target triple = "riscv32-unknown-unknown" + +;; Ensure that LSR does not create traversal starting at a negative initial offset, +;; as those are not representable on CHERIOT. + +; CHECK-LABEL: @strlen +define dso_local cherilibcallcc i32 @strlen(ptr addrspace(200) noundef readonly %str) local_unnamed_addr addrspace(200) #0 { +; CHECK: entry: +; CHECK-NOT: -1 +entry: + br label %for.cond + +; CHECK: for.cond: +for.cond: ; preds = %for.cond, %entry + %s.0 = phi ptr addrspace(200) [ %str, %entry ], [ %incdec.ptr, %for.cond ] + %0 = load i8, ptr addrspace(200) %s.0, align 1, !tbaa !6 + %tobool.not = icmp eq i8 %0, 0 + %incdec.ptr = getelementptr inbounds nuw i8, ptr addrspace(200) %s.0, i32 1 + br i1 %tobool.not, label %for.end, label %for.cond, !llvm.loop !9 + +for.end: ; preds = %for.cond + %1 = tail call i32 @llvm.cheri.cap.diff.i32(ptr addrspace(200) nonnull %s.0, ptr addrspace(200) %str) + ret i32 %1 +} + +; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) +declare i32 @llvm.cheri.cap.diff.i32(ptr addrspace(200), ptr addrspace(200)) addrspace(200) #1 + +attributes #0 = { minsize nofree nosync nounwind optsize memory(argmem: read) "no-builtin-longjmp" "no-builtin-printf" "no-builtin-setjmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cheriot" "target-features"="+32bit,+c,+e,+m,+relax,+unaligned-scalar-mem,+xcheri,+xcheriot,+zmmul,-a,-b,-d,-experimental-sdext,-experimental-sdtrig,-experimental-smctr,-experimental-ssctr,-experimental-svukte,-experimental-xqcia,-experimental-xqciac,-experimental-xqcicli,-experimental-xqcicm,-experimental-xqcics,-experimental-xqcicsr,-experimental-xqciint,-experimental-xqcilo,-experimental-xqcilsm,-experimental-xqcisls,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-f,-h,-i,-sha,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smdbltrp,-smepmp,-smmpm,-smnpm,-smrnmi,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssdbltrp,-ssnpm,-sspm,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-supm,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-svvptc,-v,-xcheri-norvc,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xmipscmove,-xmipslsp,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zaamo,-zabha,-zacas,-zalrsc,-zama16b,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zicsr,-zifencei,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-ztso,-zvbb,-zvbc,-zve32f,-zve32x,-zve64d,-zve64f,-zve64x,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl128b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl32b,-zvl4096b,-zvl512b,-zvl64b,-zvl65536b,-zvl8192b" } +attributes #1 = { mustprogress nofree nosync nounwind willreturn memory(none) } + +!llvm.module.flags = !{!0, !1, !2, !4} +!llvm.ident = !{!5} + +!0 = !{i32 1, !"wchar_size", i32 2} +!1 = !{i32 1, !"target-abi", !"cheriotrtos"} +!2 = !{i32 6, !"riscv-isa", !3} +!3 = !{!"rv32e2p0_m2p0_c2p0_zmmul1p0_xcheri0p0_xcheriot1p0"} +!4 = !{i32 8, !"SmallDataLimit", i32 0} +!5 = !{!"clang version 20.1.3 (git@github.com:resistor/llvm-project-1.git bfb9e867619569023263b0c2418ca004603f3fd1)"} +!6 = !{!7, !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C/C++ TBAA"} +!9 = distinct !{!9, !10} +!10 = !{!"llvm.loop.mustprogress"}