-
Notifications
You must be signed in to change notification settings - Fork 13.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[flang][fir] Lower do concurrent
loop nests to fir.do_concurrent
#132904
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-flang-fir-hlfir Author: Kareem Ergawy (ergawy) ChangesAdds support for lowering To that end, this PR emits the allocations for the iteration variables within the block of the For example, given the following input: do concurrent(i=1:10, j=11:20)
end do the changes in this PR emit the following MLIR: fir.do_concurrent {
%22 = fir.alloca i32 {bindc_name = "i"}
%23:2 = hlfir.declare %22 {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
%24 = fir.alloca i32 {bindc_name = "j"}
%25:2 = hlfir.declare %24 {uniq_name = "_QFsub1Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
fir.do_concurrent.loop (%arg1, %arg2) = (%18, %20) to (%19, %21) step (%c1, %c1_0) {
%26 = fir.convert %arg1 : (index) -> i32
fir.store %26 to %23#<!-- -->0 : !fir.ref<i32>
%27 = fir.convert %arg2 : (index) -> i32
fir.store %27 to %25#<!-- -->0 : !fir.ref<i32>
}
} Patch is 26.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132904.diff 7 Files Affected:
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 6e6e88a32517c..622547e32c03f 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -130,7 +130,7 @@ struct IncrementLoopInfo {
mlir::Value loopVariable = nullptr;
// Data members for structured loops.
- fir::DoLoopOp doLoop = nullptr;
+ mlir::Operation *loopOp = nullptr;
// Data members for unstructured loops.
bool hasRealControl = false;
@@ -2267,8 +2267,14 @@ class FirConverter : public Fortran::lower::AbstractConverter {
mlir::LLVM::LoopAnnotationAttr la = mlir::LLVM::LoopAnnotationAttr::get(
builder->getContext(), {}, /*vectorize=*/va, {}, /*unroll*/ ua,
/*unroll_and_jam*/ uja, {}, {}, {}, {}, {}, {}, {}, {}, {}, {});
- if (has_attrs)
- info.doLoop.setLoopAnnotationAttr(la);
+ if (has_attrs) {
+ if (auto loopOp = mlir::dyn_cast<fir::DoLoopOp>(info.loopOp))
+ loopOp.setLoopAnnotationAttr(la);
+
+ if (auto doConcurrentOp =
+ mlir::dyn_cast<fir::DoConcurrentLoopOp>(info.loopOp))
+ doConcurrentOp.setLoopAnnotationAttr(la);
+ }
}
/// Generate FIR to begin a structured or unstructured increment loop nest.
@@ -2277,96 +2283,77 @@ class FirConverter : public Fortran::lower::AbstractConverter {
llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs) {
assert(!incrementLoopNestInfo.empty() && "empty loop nest");
mlir::Location loc = toLocation();
- mlir::Operation *boundsAndStepIP = nullptr;
mlir::arith::IntegerOverflowFlags iofBackup{};
- for (IncrementLoopInfo &info : incrementLoopNestInfo) {
- mlir::Value lowerValue;
- mlir::Value upperValue;
- mlir::Value stepValue;
-
- {
- mlir::OpBuilder::InsertionGuard guard(*builder);
+ llvm::SmallVector<mlir::Value> nestLBs;
+ llvm::SmallVector<mlir::Value> nestUBs;
+ llvm::SmallVector<mlir::Value> nestSts;
+ llvm::SmallVector<mlir::Value> nestReduceOperands;
+ llvm::SmallVector<mlir::Attribute> nestReduceAttrs;
+ bool genDoConcurrent = false;
- // Set the IP before the first loop in the nest so that all nest bounds
- // and step values are created outside the nest.
- if (boundsAndStepIP)
- builder->setInsertionPointAfter(boundsAndStepIP);
+ for (IncrementLoopInfo &info : incrementLoopNestInfo) {
+ genDoConcurrent = info.isStructured() && info.isUnordered;
+ if (!genDoConcurrent)
info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym,
info.isUnordered);
- if (!getLoweringOptions().getIntegerWrapAround()) {
- iofBackup = builder->getIntegerOverflowFlags();
- builder->setIntegerOverflowFlags(
- mlir::arith::IntegerOverflowFlags::nsw);
- }
- lowerValue = genControlValue(info.lowerExpr, info);
- upperValue = genControlValue(info.upperExpr, info);
- bool isConst = true;
- stepValue = genControlValue(info.stepExpr, info,
- info.isStructured() ? nullptr : &isConst);
- if (!getLoweringOptions().getIntegerWrapAround())
- builder->setIntegerOverflowFlags(iofBackup);
- boundsAndStepIP = stepValue.getDefiningOp();
-
- // Use a temp variable for unstructured loops with non-const step.
- if (!isConst) {
- info.stepVariable =
- builder->createTemporary(loc, stepValue.getType());
- boundsAndStepIP =
- builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
+
+ if (!getLoweringOptions().getIntegerWrapAround()) {
+ iofBackup = builder->getIntegerOverflowFlags();
+ builder->setIntegerOverflowFlags(
+ mlir::arith::IntegerOverflowFlags::nsw);
+ }
+
+ nestLBs.push_back(genControlValue(info.lowerExpr, info));
+ nestUBs.push_back(genControlValue(info.upperExpr, info));
+ bool isConst = true;
+ nestSts.push_back(genControlValue(
+ info.stepExpr, info, info.isStructured() ? nullptr : &isConst));
+
+ if (!getLoweringOptions().getIntegerWrapAround())
+ builder->setIntegerOverflowFlags(iofBackup);
+
+ // Use a temp variable for unstructured loops with non-const step.
+ if (!isConst) {
+ mlir::Value stepValue = nestSts.back();
+ info.stepVariable = builder->createTemporary(loc, stepValue.getType());
+ builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
+ }
+
+ if (genDoConcurrent && nestReduceOperands.empty()) {
+ // Create DO CONCURRENT reduce operands and attributes
+ for (const auto &reduceSym : info.reduceSymList) {
+ const fir::ReduceOperationEnum reduceOperation = reduceSym.first;
+ const Fortran::semantics::Symbol *sym = reduceSym.second;
+ fir::ExtendedValue exv = getSymbolExtendedValue(*sym, nullptr);
+ nestReduceOperands.push_back(fir::getBase(exv));
+ auto reduceAttr =
+ fir::ReduceAttr::get(builder->getContext(), reduceOperation);
+ nestReduceAttrs.push_back(reduceAttr);
}
}
+ }
+ for (auto [info, lowerValue, upperValue, stepValue] :
+ llvm::zip_equal(incrementLoopNestInfo, nestLBs, nestUBs, nestSts)) {
// Structured loop - generate fir.do_loop.
if (info.isStructured()) {
+ if (info.isUnordered)
+ continue;
+
+ // The loop variable is a doLoop op argument.
mlir::Type loopVarType = info.getLoopVariableType();
- mlir::Value loopValue;
- if (info.isUnordered) {
- llvm::SmallVector<mlir::Value> reduceOperands;
- llvm::SmallVector<mlir::Attribute> reduceAttrs;
- // Create DO CONCURRENT reduce operands and attributes
- for (const auto &reduceSym : info.reduceSymList) {
- const fir::ReduceOperationEnum reduce_operation = reduceSym.first;
- const Fortran::semantics::Symbol *sym = reduceSym.second;
- fir::ExtendedValue exv = getSymbolExtendedValue(*sym, nullptr);
- reduceOperands.push_back(fir::getBase(exv));
- auto reduce_attr =
- fir::ReduceAttr::get(builder->getContext(), reduce_operation);
- reduceAttrs.push_back(reduce_attr);
- }
- // The loop variable value is explicitly updated.
- info.doLoop = builder->create<fir::DoLoopOp>(
- loc, lowerValue, upperValue, stepValue, /*unordered=*/true,
- /*finalCountValue=*/false, /*iterArgs=*/std::nullopt,
- llvm::ArrayRef<mlir::Value>(reduceOperands), reduceAttrs);
- builder->setInsertionPointToStart(info.doLoop.getBody());
- loopValue = builder->createConvert(loc, loopVarType,
- info.doLoop.getInductionVar());
- } else {
- // The loop variable is a doLoop op argument.
- info.doLoop = builder->create<fir::DoLoopOp>(
- loc, lowerValue, upperValue, stepValue, /*unordered=*/false,
- /*finalCountValue=*/true,
- builder->createConvert(loc, loopVarType, lowerValue));
- builder->setInsertionPointToStart(info.doLoop.getBody());
- loopValue = info.doLoop.getRegionIterArgs()[0];
- }
+ auto loopOp = builder->create<fir::DoLoopOp>(
+ loc, lowerValue, upperValue, stepValue, /*unordered=*/false,
+ /*finalCountValue=*/true,
+ builder->createConvert(loc, loopVarType, lowerValue));
+ info.loopOp = loopOp;
+ builder->setInsertionPointToStart(loopOp.getBody());
+ mlir::Value loopValue = loopOp.getRegionIterArgs()[0];
+
// Update the loop variable value in case it has non-index references.
builder->create<fir::StoreOp>(loc, loopValue, info.loopVariable);
- if (info.maskExpr) {
- Fortran::lower::StatementContext stmtCtx;
- mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx);
- stmtCtx.finalizeAndReset();
- mlir::Value maskCondCast =
- builder->createConvert(loc, builder->getI1Type(), maskCond);
- auto ifOp = builder->create<fir::IfOp>(loc, maskCondCast,
- /*withElseRegion=*/false);
- builder->setInsertionPointToStart(&ifOp.getThenRegion().front());
- }
- if (info.hasLocalitySpecs())
- handleLocalitySpecs(info);
-
addLoopAnnotationAttr(info, dirs);
continue;
}
@@ -2430,6 +2417,60 @@ class FirConverter : public Fortran::lower::AbstractConverter {
builder->restoreInsertionPoint(insertPt);
}
}
+
+ if (genDoConcurrent) {
+ auto loopWrapperOp = builder->create<fir::DoConcurrentOp>(loc);
+ builder->setInsertionPointToStart(
+ builder->createBlock(&loopWrapperOp.getRegion()));
+
+ for (IncrementLoopInfo &info : llvm::reverse(incrementLoopNestInfo)) {
+ info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym,
+ info.isUnordered);
+ }
+
+ builder->setInsertionPointToEnd(loopWrapperOp.getBody());
+ auto loopOp = builder->create<fir::DoConcurrentLoopOp>(
+ loc, nestLBs, nestUBs, nestSts, nestReduceOperands,
+ nestReduceAttrs.empty()
+ ? nullptr
+ : mlir::ArrayAttr::get(builder->getContext(), nestReduceAttrs),
+ nullptr);
+
+ llvm::SmallVector<mlir::Type> loopBlockArgTypes(
+ incrementLoopNestInfo.size(), builder->getIndexType());
+ llvm::SmallVector<mlir::Location> loopBlockArgLocs(
+ incrementLoopNestInfo.size(), loc);
+ mlir::Region &loopRegion = loopOp.getRegion();
+ mlir::Block *loopBlock = builder->createBlock(
+ &loopRegion, loopRegion.begin(), loopBlockArgTypes, loopBlockArgLocs);
+ builder->setInsertionPointToStart(loopBlock);
+
+ for (auto [info, blockArg] :
+ llvm::zip_equal(incrementLoopNestInfo, loopBlock->getArguments())) {
+ info.loopOp = loopOp;
+ mlir::Value loopValue =
+ builder->createConvert(loc, info.getLoopVariableType(), blockArg);
+ builder->create<fir::StoreOp>(loc, loopValue, info.loopVariable);
+
+ if (info.maskExpr) {
+ Fortran::lower::StatementContext stmtCtx;
+ mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx);
+ stmtCtx.finalizeAndReset();
+ mlir::Value maskCondCast =
+ builder->createConvert(loc, builder->getI1Type(), maskCond);
+ auto ifOp = builder->create<fir::IfOp>(loc, maskCondCast,
+ /*withElseRegion=*/false);
+ builder->setInsertionPointToStart(&ifOp.getThenRegion().front());
+ }
+ }
+
+ IncrementLoopInfo &innermostInfo = incrementLoopNestInfo.back();
+
+ if (innermostInfo.hasLocalitySpecs())
+ handleLocalitySpecs(innermostInfo);
+
+ addLoopAnnotationAttr(innermostInfo, dirs);
+ }
}
/// Generate FIR to end a structured or unstructured increment loop nest.
@@ -2446,29 +2487,31 @@ class FirConverter : public Fortran::lower::AbstractConverter {
it != rend; ++it) {
IncrementLoopInfo &info = *it;
if (info.isStructured()) {
- // End fir.do_loop.
+ // End fir.do_concurent.loop.
if (info.isUnordered) {
- builder->setInsertionPointAfter(info.doLoop);
+ builder->setInsertionPointAfter(info.loopOp->getParentOp());
continue;
}
+
+ // End fir.do_loop.
// Decrement tripVariable.
- builder->setInsertionPointToEnd(info.doLoop.getBody());
+ auto doLoopOp = mlir::cast<fir::DoLoopOp>(info.loopOp);
+ builder->setInsertionPointToEnd(doLoopOp.getBody());
llvm::SmallVector<mlir::Value, 2> results;
results.push_back(builder->create<mlir::arith::AddIOp>(
- loc, info.doLoop.getInductionVar(), info.doLoop.getStep(),
- iofAttr));
+ loc, doLoopOp.getInductionVar(), doLoopOp.getStep(), iofAttr));
// Step loopVariable to help optimizations such as vectorization.
// Induction variable elimination will clean up as necessary.
mlir::Value step = builder->createConvert(
- loc, info.getLoopVariableType(), info.doLoop.getStep());
+ loc, info.getLoopVariableType(), doLoopOp.getStep());
mlir::Value loopVar =
builder->create<fir::LoadOp>(loc, info.loopVariable);
results.push_back(
builder->create<mlir::arith::AddIOp>(loc, loopVar, step, iofAttr));
builder->create<fir::ResultOp>(loc, results);
- builder->setInsertionPointAfter(info.doLoop);
+ builder->setInsertionPointAfter(doLoopOp);
// The loop control variable may be used after the loop.
- builder->create<fir::StoreOp>(loc, info.doLoop.getResult(1),
+ builder->create<fir::StoreOp>(loc, doLoopOp.getResult(1),
info.loopVariable);
continue;
}
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index b7f8a8d3a9d56..8e7f47a8f2a9b 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -279,6 +279,9 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() {
if (auto cufKernelOp = getRegion().getParentOfType<cuf::KernelOp>())
return &cufKernelOp.getRegion().front();
+ if (auto doConcurentOp = getRegion().getParentOfType<fir::DoConcurrentOp>())
+ return doConcurentOp.getBody();
+
return getEntryBlock();
}
diff --git a/flang/test/Lower/do_concurrent.f90 b/flang/test/Lower/do_concurrent.f90
index ef93d2d6b035b..cc113f59c35e3 100644
--- a/flang/test/Lower/do_concurrent.f90
+++ b/flang/test/Lower/do_concurrent.f90
@@ -14,6 +14,9 @@ subroutine sub1(n)
implicit none
integer :: n, m, i, j, k
integer, dimension(n) :: a
+!CHECK: %[[N_DECL:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %{{.*}} {uniq_name = "_QFsub1En"}
+!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub1Ea"}
+
!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
!CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index
!CHECK: %[[UB1:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
@@ -29,10 +32,30 @@ subroutine sub1(n)
!CHECK: %[[UB3:.*]] = arith.constant 10 : i32
!CHECK: %[[UB3_CVT:.*]] = fir.convert %[[UB3]] : (i32) -> index
-!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered
-!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered
-!CHECK: fir.do_loop %{{.*}} = %[[LB3_CVT]] to %[[UB3_CVT]] step %{{.*}} unordered
+!CHECK: fir.do_concurrent
+!CHECK: %[[I:.*]] = fir.alloca i32 {bindc_name = "i"}
+!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I]]
+!CHECK: %[[J:.*]] = fir.alloca i32 {bindc_name = "j"}
+!CHECK: %[[J_DECL:.*]]:2 = hlfir.declare %[[J]]
+!CHECK: %[[K:.*]] = fir.alloca i32 {bindc_name = "k"}
+!CHECK: %[[K_DECL:.*]]:2 = hlfir.declare %[[K]]
+
+!CHECK: fir.do_concurrent.loop (%[[I_IV:.*]], %[[J_IV:.*]], %[[K_IV:.*]]) =
+!CHECK-SAME: (%[[LB1_CVT]], %[[LB2_CVT]], %[[LB3_CVT]]) to
+!CHECK-SAME: (%[[UB1_CVT]], %[[UB2_CVT]], %[[UB3_CVT]]) step
+!CHECK-SAME: (%{{.*}}, %{{.*}}, %{{.*}}) {
+!CHECK: %[[I_IV_CVT:.*]] = fir.convert %[[I_IV]] : (index) -> i32
+!CHECK: fir.store %[[I_IV_CVT]] to %[[I_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[J_IV_CVT:.*]] = fir.convert %[[J_IV]] : (index) -> i32
+!CHECK: fir.store %[[J_IV_CVT]] to %[[J_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[K_IV_CVT:.*]] = fir.convert %[[K_IV]] : (index) -> i32
+!CHECK: fir.store %[[K_IV_CVT]] to %[[K_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[N_VAL:.*]] = fir.load %[[N_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[I_VAL:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[I_VAL_CVT:.*]] = fir.convert %[[I_VAL]] : (i32) -> i64
+!CHECK: %[[A_ELEM:.*]] = hlfir.designate %[[A_DECL]]#0 (%[[I_VAL_CVT]])
+!CHECK: hlfir.assign %[[N_VAL]] to %[[A_ELEM]] : i32, !fir.ref<i32>
do concurrent(i=1:n, j=1:bar(n*m, n/m), k=5:10)
a(i) = n
end do
@@ -45,14 +68,17 @@ subroutine sub2(n)
integer, dimension(n) :: a
!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
!CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index
-!CHECK: %[[UB1:.*]] = fir.load %5#0 : !fir.ref<i32>
+!CHECK: %[[UB1:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
!CHECK: %[[UB1_CVT:.*]] = fir.convert %[[UB1]] : (i32) -> index
-!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered
+!CHECK: fir.do_concurrent
+!CHECK: fir.do_concurrent.loop (%{{.*}}) = (%[[LB1_CVT]]) to (%[[UB1_CVT]]) step (%{{.*}})
+
!CHECK: %[[LB2:.*]] = arith.constant 1 : i32
!CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> index
!CHECK: %[[UB2:.*]] = fir.call @_QPbar(%{{.*}}, %{{.*}}) proc_attrs<pure> fastmath<contract> : (!fir.ref<i32>, !fir.ref<i32>) -> i32
!CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> index
-!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered
+!CHECK: fir.do_concurrent
+!CHECK: fir.do_concurrent.loop (%{{.*}}) = (%[[LB2_CVT]]) to (%[[UB2_CVT]]) step (%{{.*}})
do concurrent(i=1:n)
do concurrent(j=1:bar(n*m, n/m))
a(i) = n
@@ -60,7 +86,6 @@ subroutine sub2(n)
end do
end subroutine
-
!CHECK-LABEL: unstructured
subroutine unstructured(inner_step)
integer(4) :: i, j, inner_step
diff --git a/flang/test/Lower/do_concurrent_local_default_init.f90 b/flang/test/Lower/do_concurrent_local_default_init.f90
index 7652e4fcd0402..207704ac1a990 100644
--- a/flang/test/Lower/do_concurrent_local_default_init.f90
+++ b/flang/test/Lower/do_concurrent_local_default_init.f90
@@ -29,7 +29,7 @@ subroutine test_default_init()
! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>> {fir.bindc_name = "p"}) {
! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>
! CHECK: %[[VAL_7:.*]] = fir.box_elesize %[[VAL_6]] : (!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>) -> index
-! CHECK: fir.do_loop
+! CHECK: fir.do_concurrent.loop
! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>> {bindc_name = "p", pinned, uniq_name = "_QFtest_ptrEp"}
! CHECK: %[[VAL_17:.*]] = fir.zero_bits !fir.ptr<!fir.array<?x!fir.char<1,?>>>
! CHECK: %[[VAL_18:.*]] = arith.constant 0 : index
@@ -43,7 +43,7 @@ subroutine test_default_init()
! CHECK: }
! CHECK-LABEL: func.func @_QPtest_default_init(
-! CHECK: fir.do_loop
+! CHECK: fir.do_concurrent.loop
! CHECK: %[[VAL_26:.*]] = fir.alloca !fir.type<_QFtest_default_initTt{i:i32}> {bindc_name = "a", pinned, uniq_name = "_QFtest_default_initEa"}
! CHECK: %[[VAL_27:.*]] = fir.embox %[[VAL_26]] : (!fir.ref<!fir.type<_QFtest_default_initTt{i:i32}>>) -> !fir.box<!fir.type<_QFtest_default_initTt{i:i32}>>
! CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_27]] : (!fir.box<!fir.type<_QFtest_default_initTt{i:i32}>>) -> !fir.box<none>
diff --git a/flang/test/Lower/loops.f90 b/flang/test/Lower/loops.f90
index ea65ba3e4d66d..60df27a591dc3 100644
--- a/flang/test/Lower/loops.f90
+++ b/flang/test/Lower/loops.f90
@@ -2,15 +2,6 @@
! CHECK-LABEL: loop_test
subroutine loop_test
- ! CHECK: %[[VAL_2:.*]] = fir.alloca i16 {bindc_name = "i"}
- ! CHECK: %[[VAL_3:.*]] = fir.alloca i16 {bindc_name = "i"}
- ! CHECK: %[[VAL_4:.*]] = fir.alloca i16 {bindc_name = "i"}
- ! CHECK: %[[VAL_5:.*]] = fir.alloca i8 {bindc_name = "k"}
- ! CHECK: %[[VAL_6:.*]] = fir.alloca i8 {bindc_name = "j"}
- ! CHECK: %[[VAL_7:.*]] = fir.alloca i8 {bindc_name = "i"}
- ! CHECK: %[[VAL_8:.*]] = fir.alloca i32 {bindc_name = "k"}
- ! CHECK: %[[VAL_9:.*]] = fir.alloca i32 {bindc_name = "j"}
- ! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i"}
! CHECK: %[[VAL_11:.*]] = fir.alloca !fir.array<5x5x5xi32> {bindc_name = "a", uniq_name = "_QFloop_testEa"}
! CHECK...
[truncated]
|
c85c0ef
to
421e550
Compare
421e550
to
252093c
Compare
Please have a look when you have time 🙏! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for working on this Kareem. Just a few questions.
Could this be made any simpler by using hlfir::genLoopNest
? No worries if it isn't a good fit.
flang/lib/Lower/Bridge.cpp
Outdated
if (boundsAndStepIP) | ||
builder->setInsertionPointAfter(boundsAndStepIP); | ||
for (IncrementLoopInfo &info : incrementLoopNestInfo) { | ||
genDoConcurrent = info.isStructured() && info.isUnordered; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The isUnordered
field has a comment saying that it is also used for FORALL
(although that may not still be true in the HLFIR lowering path). I think it would be better to have a field that specifically says this is supposed to be a do concurrent
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
isUnordered
is also used for array syntax.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I might be missing something, but from the code, it seems like IncrementLoopNestInfo::isUnordered
is set to true
only for do concurrent
loops (in getConcurrentControl
). So at this stage, it might be safe to just rename it to isDoConcurrent
, I think.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Renaming is fine for me. That will make it clear this is the only use and prevent future conflation of uses.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable); | ||
} | ||
|
||
if (genDoConcurrent && nestReduceOperands.empty()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is the check for nestReduceOperands.empty()
needed here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because getConcurrentControl
(https://github.com/llvm/llvm-project/blob/main/flang/lib/Lower/Bridge.cpp#L1998) adds reductions to all IncrementLoopInfo
instances of the nest.
But if people more familiar with this part think it is a good idea, we can just add the reductions to the last IncrementLoopInfo
instance like other locality specifiers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for explaining. I am not particularly familiar with this bit of code but only adding them to the last IncrementLoopInfo
sounds okay to me.
flang/lib/Lower/Bridge.cpp
Outdated
// Structured loop - generate fir.do_loop. | ||
if (info.isStructured()) { | ||
if (info.isUnordered) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it would be clearer to check here for genDoConcurrent
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
if (auto doConcurentOp = getRegion().getParentOfType<fir::DoConcurrentOp>()) | ||
return doConcurentOp.getBody(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need this for any allocation in the do concurrent? We did not have this so far so I'm just wondering.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We do it this way for the new fir.do_concurrent
and fir.do_concurrent.loop
ops to keep all info related to the loop nest closely located in the IR. The allocations needed here are for the nest iteration variables. See: https://discourse.llvm.org/t/modeling-do-concurrent-loops-in-the-fir-dialect/84950/6?u=bob.belcher and related discussion and https://github.com/llvm/llvm-project/blob/main/flang/include/flang/Optimizer/Dialect/FIROps.td#L3457.
Adds support for lowering `do concurrent` nests from PFT to the new `fir.do_concurrent` MLIR op as well as its special terminator `fir.do_concurrent.loop` which models the actual loop nest. To that end, this PR emits the allocations for the iteration variables within the block of the `fir.do_concurrent` op and creates a region for the `fir.do_concurrent.loop` op that accepts arguments equal in number to the number of the input `do concurrent` iteration ranges. For example, given the following input: ```fortran do concurrent(i=1:10, j=11:20) end do ``` the changes in this PR emit the following MLIR: ```mlir fir.do_concurrent { %22 = fir.alloca i32 {bindc_name = "i"} %23:2 = hlfir.declare %22 {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) %24 = fir.alloca i32 {bindc_name = "j"} %25:2 = hlfir.declare %24 {uniq_name = "_QFsub1Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) fir.do_concurrent.loop (%arg1, %arg2) = (%18, %20) to (%19, %21) step (%c1, %c1_0) { %26 = fir.convert %arg1 : (index) -> i32 fir.store %26 to %23#0 : !fir.ref<i32> %27 = fir.convert %arg2 : (index) -> i32 fir.store %27 to %25#0 : !fir.ref<i32> } } ```
I didn't know about this util, thanks for pointing it out. I think it won't work out of the box since |
252093c
to
4cefbea
Compare
58bcb4b
to
2122290
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM once @clementval is happy
@@ -97,7 +97,8 @@ struct IncrementLoopInfo { | |||
bool isUnordered = false) | |||
: loopVariableSym{&sym}, lowerExpr{Fortran::semantics::GetExpr(lower)}, | |||
upperExpr{Fortran::semantics::GetExpr(upper)}, | |||
stepExpr{Fortran::semantics::GetExpr(step)}, isUnordered{isUnordered} {} | |||
stepExpr{Fortran::semantics::GetExpr(step)}, isConcurrent{isUnordered} { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit
stepExpr{Fortran::semantics::GetExpr(step)}, isConcurrent{isUnordered} { | |
stepExpr{Fortran::semantics::GetExpr(step)}, isConcurrent{isConcurrent} { |
builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable); | ||
} | ||
|
||
if (genDoConcurrent && nestReduceOperands.empty()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for explaining. I am not particularly familiar with this bit of code but only adding them to the last IncrementLoopInfo
sounds okay to me.
Adds support for lowering
do concurrent
nests from PFT to the newfir.do_concurrent
MLIR op as well as its special terminatorfir.do_concurrent.loop
which models the actual loop nest.To that end, this PR emits the allocations for the iteration variables within the block of the
fir.do_concurrent
op and creates a region for thefir.do_concurrent.loop
op that accepts arguments equal in number to the number of the inputdo concurrent
iteration ranges.For example, given the following input:
the changes in this PR emit the following MLIR: