-
Notifications
You must be signed in to change notification settings - Fork 14k
[LSR] Make canHoistIVInc allow non-integer types #143707
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
canHoistIVInc was made to only allow integer types to avoid a crash in isIndexedLoadLegal/isIndexedStoreLegal due to them failing an assertion in getValueType (or rather in MVT::getVT which gets called from that) when passed a struct type. Adjusting these functions to pass AllowUnknown=true to getValueType means we don't get an assertion failure (MVT::Other is returned which TLI->isIndexedLoadLegal should then return false for), meaning we can remove this check for integer type.
@llvm/pr-subscribers-llvm-transforms Author: John Brawn (john-brawn-arm) ChangescanHoistIVInc was made to only allow integer types to avoid a crash in isIndexedLoadLegal/isIndexedStoreLegal due to them failing an assertion in getValueType (or rather in MVT::getVT which gets called from that) when passed a struct type. Adjusting these functions to pass AllowUnknown=true to getValueType means we don't get an assertion failure (MVT::Other is returned which TLI->isIndexedLoadLegal should then return false for), meaning we can remove this check for integer type. Full diff: https://github.com/llvm/llvm-project/pull/143707.diff 3 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 574152e254f15..3b87978fe3fab 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -478,12 +478,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty) const override {
- EVT VT = getTLI()->getValueType(DL, Ty);
+ EVT VT = getTLI()->getValueType(DL, Ty, /*AllowUnknown=*/true);
return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
}
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty) const override {
- EVT VT = getTLI()->getValueType(DL, Ty);
+ EVT VT = getTLI()->getValueType(DL, Ty, /*AllowUnknown=*/true);
return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
}
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 242e571c072af..e4f35e4b2108b 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -6008,9 +6008,8 @@ static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup,
Instruction *I = Fixup.UserInst;
Type *Ty = I->getType();
- return Ty->isIntegerTy() &&
- ((isa<LoadInst>(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) ||
- (isa<StoreInst>(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty)));
+ return (isa<LoadInst>(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) ||
+ (isa<StoreInst>(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty));
}
/// Rewrite all the fixup locations with new values, following the chosen
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/postidx-load.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/postidx-load.ll
new file mode 100644
index 0000000000000..5976658ccdf86
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/postidx-load.ll
@@ -0,0 +1,189 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-none-elf | FileCheck %s
+
+; Check that the load in the loop has postindex addressing, regardless of the
+; type or whether the input uses postindex or offset addressing.
+
+define i32 @i32_initially_postidx(ptr %p, i64 %n) {
+; CHECK-LABEL: i32_initially_postidx:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: b.lt .LBB0_5
+; CHECK-NEXT: // %bb.1: // %for.body.preheader
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: .LBB0_2: // %for.body
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldr w9, [x0], #4
+; CHECK-NEXT: add w8, w8, w9
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: b.lo .LBB0_5
+; CHECK-NEXT: // %bb.3: // %for.inc
+; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: subs x1, x1, #1
+; CHECK-NEXT: b.ne .LBB0_2
+; CHECK-NEXT: // %bb.4: // %cleanup
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB0_5:
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+entry:
+ %cmp1 = icmp sgt i64 %n, 0
+ br i1 %cmp1, label %for.body, label %cleanup
+
+for.body:
+ %iv = phi i64 [ %iv.next, %for.inc ], [ 0, %entry ]
+ %accum = phi i32 [ %add, %for.inc ], [ 0, %entry ]
+ %ptr = phi ptr [ %ptr.next, %for.inc ], [ %p, %entry ]
+ %val = load i32, ptr %ptr, align 4
+ %ptr.next = getelementptr inbounds nuw i8, ptr %ptr, i64 4
+ %add = add i32 %accum, %val
+ %cmp2 = icmp ult i32 %add, 0
+ br i1 %cmp2, label %cleanup, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, %n
+ br i1 %exitcond, label %cleanup, label %for.body
+
+cleanup:
+ %ret = phi i32 [ 0, %entry ], [ 0, %for.body ], [ %add, %for.inc ]
+ ret i32 %ret
+}
+
+define i32 @i32_initially_offset(ptr %p, i64 %n) {
+; CHECK-LABEL: i32_initially_offset:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: b.lt .LBB1_5
+; CHECK-NEXT: // %bb.1: // %for.body.preheader
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: .LBB1_2: // %for.body
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldr w9, [x0], #4
+; CHECK-NEXT: add w8, w8, w9
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: b.lo .LBB1_5
+; CHECK-NEXT: // %bb.3: // %for.cond
+; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: subs x1, x1, #1
+; CHECK-NEXT: b.ne .LBB1_2
+; CHECK-NEXT: // %bb.4: // %cleanup
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB1_5:
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+entry:
+ %cmp1 = icmp sgt i64 %n, 0
+ br i1 %cmp1, label %for.body, label %cleanup
+
+for.cond:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, %n
+ br i1 %exitcond, label %cleanup, label %for.body
+
+for.body:
+ %iv = phi i64 [ %iv.next, %for.cond ], [ 0, %entry ]
+ %accum = phi i32 [ %add, %for.cond ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %iv
+ %val = load i32, ptr %arrayidx, align 4
+ %add = add i32 %accum, %val
+ %cmp2 = icmp ult i32 %add, 0
+ br i1 %cmp2, label %cleanup, label %for.cond
+
+cleanup:
+ %ret = phi i32 [ 0, %entry ], [ 0, %for.body ], [ %add, %for.cond ]
+ ret i32 %ret
+}
+
+define float @float_initially_postidx(ptr %p, i64 %n) {
+; CHECK-LABEL: float_initially_postidx:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi d0, #0000000000000000
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: b.lt .LBB2_3
+; CHECK-NEXT: .LBB2_1: // %for.body
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldr s1, [x0], #4
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: b.mi .LBB2_4
+; CHECK-NEXT: // %bb.2: // %for.inc
+; CHECK-NEXT: // in Loop: Header=BB2_1 Depth=1
+; CHECK-NEXT: subs x1, x1, #1
+; CHECK-NEXT: b.ne .LBB2_1
+; CHECK-NEXT: .LBB2_3: // %cleanup
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB2_4:
+; CHECK-NEXT: movi d0, #0000000000000000
+; CHECK-NEXT: ret
+entry:
+ %cmp1 = icmp sgt i64 %n, 0
+ br i1 %cmp1, label %for.body, label %cleanup
+
+for.body:
+ %iv = phi i64 [ %iv.next, %for.inc ], [ 0, %entry ]
+ %accum = phi float [ %add, %for.inc ], [ 0.000000e+00, %entry ]
+ %ptr = phi ptr [ %ptr.next, %for.inc ], [ %p, %entry ]
+ %val = load float, ptr %ptr, align 4
+ %ptr.next = getelementptr inbounds nuw i8, ptr %ptr, i64 4
+ %add = fadd float %accum, %val
+ %cmp2 = fcmp olt float %add, 0.000000e+00
+ br i1 %cmp2, label %cleanup, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, %n
+ br i1 %exitcond, label %cleanup, label %for.body
+
+cleanup:
+ %ret = phi float [ 0.000000e+00, %entry ], [ 0.000000e+00, %for.body ], [ %add, %for.inc ]
+ ret float %ret
+}
+
+define float @float_initially_offset(ptr %p, i64 %n) {
+; CHECK-LABEL: float_initially_offset:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi d0, #0000000000000000
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: b.lt .LBB3_3
+; CHECK-NEXT: .LBB3_1: // %for.body
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldr s1, [x0], #4
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: b.mi .LBB3_4
+; CHECK-NEXT: // %bb.2: // %for.cond
+; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1
+; CHECK-NEXT: subs x1, x1, #1
+; CHECK-NEXT: b.ne .LBB3_1
+; CHECK-NEXT: .LBB3_3: // %cleanup
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB3_4:
+; CHECK-NEXT: movi d0, #0000000000000000
+; CHECK-NEXT: ret
+entry:
+ %cmp1 = icmp sgt i64 %n, 0
+ br i1 %cmp1, label %for.body, label %cleanup
+
+for.cond:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, %n
+ br i1 %exitcond, label %cleanup, label %for.body
+
+for.body:
+ %iv = phi i64 [ %iv.next, %for.cond ], [ 0, %entry ]
+ %accum = phi float [ %add, %for.cond ], [ 0.000000e+00, %entry ]
+ %arrayidx = getelementptr inbounds nuw float, ptr %p, i64 %iv
+ %val = load float, ptr %arrayidx, align 4
+ %add = fadd float %accum, %val
+ %cmp2 = fcmp olt float %add, 0.000000e+00
+ br i1 %cmp2, label %cleanup, label %for.cond
+
+cleanup:
+ %ret = phi float [ 0.000000e+00, %entry ], [ 0.000000e+00, %for.body ], [ %add, %for.cond ]
+ ret float %ret
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
canHoistIVInc was made to only allow integer types to avoid a crash in isIndexedLoadLegal/isIndexedStoreLegal due to them failing an assertion in getValueType (or rather in MVT::getVT which gets called from that) when passed a struct type. Adjusting these functions to pass AllowUnknown=true to getValueType means we don't get an assertion failure (MVT::Other is returned which TLI->isIndexedLoadLegal should then return false for), meaning we can remove this check for integer type.