[flang][fir] Lower `do concurrent` loop nests to `fir.do_concurrent` #132904

ergawy · 2025-03-25T09:27:26Z

Adds support for lowering do concurrent nests from PFT to the new fir.do_concurrent MLIR op as well as its special terminator fir.do_concurrent.loop which models the actual loop nest.

To that end, this PR emits the allocations for the iteration variables within the block of the fir.do_concurrent op and creates a region for the fir.do_concurrent.loop op that accepts arguments equal in number to the number of the input do concurrent iteration ranges.

For example, given the following input:

   do concurrent(i=1:10, j=11:20)
   end do

the changes in this PR emit the following MLIR:

    fir.do_concurrent {
      %22 = fir.alloca i32 {bindc_name = "i"}
      %23:2 = hlfir.declare %22 {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
      %24 = fir.alloca i32 {bindc_name = "j"}
      %25:2 = hlfir.declare %24 {uniq_name = "_QFsub1Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
      fir.do_concurrent.loop (%arg1, %arg2) = (%18, %20) to (%19, %21) step (%c1, %c1_0) {
        %26 = fir.convert %arg1 : (index) -> i32
        fir.store %26 to %23#0 : !fir.ref<i32>
        %27 = fir.convert %arg2 : (index) -> i32
        fir.store %27 to %25#0 : !fir.ref<i32>
      }
    }

llvmbot · 2025-03-25T09:27:57Z

@llvm/pr-subscribers-flang-fir-hlfir

Author: Kareem Ergawy (ergawy)

Changes

Adds support for lowering do concurrent nests from PFT to the new fir.do_concurrent MLIR op as well as its special terminator fir.do_concurrent.loop which models the actual loop nest.

To that end, this PR emits the allocations for the iteration variables within the block of the fir.do_concurrent op and creates a region for the fir.do_concurrent.loop op that accepts arguments equal in number to the number of the input do concurrent iteration ranges.

For example, given the following input:

   do concurrent(i=1:10, j=11:20)
   end do

the changes in this PR emit the following MLIR:

    fir.do_concurrent {
      %22 = fir.alloca i32 {bindc_name = "i"}
      %23:2 = hlfir.declare %22 {uniq_name = "_QFsub1Ei"} : (!fir.ref&lt;i32&gt;) -&gt; (!fir.ref&lt;i32&gt;, !fir.ref&lt;i32&gt;)
      %24 = fir.alloca i32 {bindc_name = "j"}
      %25:2 = hlfir.declare %24 {uniq_name = "_QFsub1Ej"} : (!fir.ref&lt;i32&gt;) -&gt; (!fir.ref&lt;i32&gt;, !fir.ref&lt;i32&gt;)
      fir.do_concurrent.loop (%arg1, %arg2) = (%18, %20) to (%19, %21) step (%c1, %c1_0) {
        %26 = fir.convert %arg1 : (index) -&gt; i32
        fir.store %26 to %23#<!-- -->0 : !fir.ref&lt;i32&gt;
        %27 = fir.convert %arg2 : (index) -&gt; i32
        fir.store %27 to %25#<!-- -->0 : !fir.ref&lt;i32&gt;
      }
    }

Patch is 26.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132904.diff

7 Files Affected:

(modified) flang/lib/Lower/Bridge.cpp (+130-87)
(modified) flang/lib/Optimizer/Builder/FIRBuilder.cpp (+3)
(modified) flang/test/Lower/do_concurrent.f90 (+32-7)
(modified) flang/test/Lower/do_concurrent_local_default_init.f90 (+2-2)
(modified) flang/test/Lower/loops.f90 (+13-24)
(modified) flang/test/Lower/loops3.f90 (+1-3)
(modified) flang/test/Lower/nsw.f90 (+3-2)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 6e6e88a32517c..622547e32c03f 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -130,7 +130,7 @@ struct IncrementLoopInfo {
   mlir::Value loopVariable = nullptr;
 
   // Data members for structured loops.
-  fir::DoLoopOp doLoop = nullptr;
+  mlir::Operation *loopOp = nullptr;
 
   // Data members for unstructured loops.
   bool hasRealControl = false;
@@ -2267,8 +2267,14 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     mlir::LLVM::LoopAnnotationAttr la = mlir::LLVM::LoopAnnotationAttr::get(
         builder->getContext(), {}, /*vectorize=*/va, {}, /*unroll*/ ua,
         /*unroll_and_jam*/ uja, {}, {}, {}, {}, {}, {}, {}, {}, {}, {});
-    if (has_attrs)
-      info.doLoop.setLoopAnnotationAttr(la);
+    if (has_attrs) {
+      if (auto loopOp = mlir::dyn_cast<fir::DoLoopOp>(info.loopOp))
+        loopOp.setLoopAnnotationAttr(la);
+
+      if (auto doConcurrentOp =
+              mlir::dyn_cast<fir::DoConcurrentLoopOp>(info.loopOp))
+        doConcurrentOp.setLoopAnnotationAttr(la);
+    }
   }
 
   /// Generate FIR to begin a structured or unstructured increment loop nest.
@@ -2277,96 +2283,77 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs) {
     assert(!incrementLoopNestInfo.empty() && "empty loop nest");
     mlir::Location loc = toLocation();
-    mlir::Operation *boundsAndStepIP = nullptr;
     mlir::arith::IntegerOverflowFlags iofBackup{};
 
-    for (IncrementLoopInfo &info : incrementLoopNestInfo) {
-      mlir::Value lowerValue;
-      mlir::Value upperValue;
-      mlir::Value stepValue;
-
-      {
-        mlir::OpBuilder::InsertionGuard guard(*builder);
+    llvm::SmallVector<mlir::Value> nestLBs;
+    llvm::SmallVector<mlir::Value> nestUBs;
+    llvm::SmallVector<mlir::Value> nestSts;
+    llvm::SmallVector<mlir::Value> nestReduceOperands;
+    llvm::SmallVector<mlir::Attribute> nestReduceAttrs;
+    bool genDoConcurrent = false;
 
-        // Set the IP before the first loop in the nest so that all nest bounds
-        // and step values are created outside the nest.
-        if (boundsAndStepIP)
-          builder->setInsertionPointAfter(boundsAndStepIP);
+    for (IncrementLoopInfo &info : incrementLoopNestInfo) {
+      genDoConcurrent = info.isStructured() && info.isUnordered;
 
+      if (!genDoConcurrent)
         info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym,
                                                    info.isUnordered);
-        if (!getLoweringOptions().getIntegerWrapAround()) {
-          iofBackup = builder->getIntegerOverflowFlags();
-          builder->setIntegerOverflowFlags(
-              mlir::arith::IntegerOverflowFlags::nsw);
-        }
-        lowerValue = genControlValue(info.lowerExpr, info);
-        upperValue = genControlValue(info.upperExpr, info);
-        bool isConst = true;
-        stepValue = genControlValue(info.stepExpr, info,
-                                    info.isStructured() ? nullptr : &isConst);
-        if (!getLoweringOptions().getIntegerWrapAround())
-          builder->setIntegerOverflowFlags(iofBackup);
-        boundsAndStepIP = stepValue.getDefiningOp();
-
-        // Use a temp variable for unstructured loops with non-const step.
-        if (!isConst) {
-          info.stepVariable =
-              builder->createTemporary(loc, stepValue.getType());
-          boundsAndStepIP =
-              builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
+
+      if (!getLoweringOptions().getIntegerWrapAround()) {
+        iofBackup = builder->getIntegerOverflowFlags();
+        builder->setIntegerOverflowFlags(
+            mlir::arith::IntegerOverflowFlags::nsw);
+      }
+
+      nestLBs.push_back(genControlValue(info.lowerExpr, info));
+      nestUBs.push_back(genControlValue(info.upperExpr, info));
+      bool isConst = true;
+      nestSts.push_back(genControlValue(
+          info.stepExpr, info, info.isStructured() ? nullptr : &isConst));
+
+      if (!getLoweringOptions().getIntegerWrapAround())
+        builder->setIntegerOverflowFlags(iofBackup);
+
+      // Use a temp variable for unstructured loops with non-const step.
+      if (!isConst) {
+        mlir::Value stepValue = nestSts.back();
+        info.stepVariable = builder->createTemporary(loc, stepValue.getType());
+        builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
+      }
+
+      if (genDoConcurrent && nestReduceOperands.empty()) {
+        // Create DO CONCURRENT reduce operands and attributes
+        for (const auto &reduceSym : info.reduceSymList) {
+          const fir::ReduceOperationEnum reduceOperation = reduceSym.first;
+          const Fortran::semantics::Symbol *sym = reduceSym.second;
+          fir::ExtendedValue exv = getSymbolExtendedValue(*sym, nullptr);
+          nestReduceOperands.push_back(fir::getBase(exv));
+          auto reduceAttr =
+              fir::ReduceAttr::get(builder->getContext(), reduceOperation);
+          nestReduceAttrs.push_back(reduceAttr);
         }
       }
+    }
 
+    for (auto [info, lowerValue, upperValue, stepValue] :
+         llvm::zip_equal(incrementLoopNestInfo, nestLBs, nestUBs, nestSts)) {
       // Structured loop - generate fir.do_loop.
       if (info.isStructured()) {
+        if (info.isUnordered)
+          continue;
+
+        // The loop variable is a doLoop op argument.
         mlir::Type loopVarType = info.getLoopVariableType();
-        mlir::Value loopValue;
-        if (info.isUnordered) {
-          llvm::SmallVector<mlir::Value> reduceOperands;
-          llvm::SmallVector<mlir::Attribute> reduceAttrs;
-          // Create DO CONCURRENT reduce operands and attributes
-          for (const auto &reduceSym : info.reduceSymList) {
-            const fir::ReduceOperationEnum reduce_operation = reduceSym.first;
-            const Fortran::semantics::Symbol *sym = reduceSym.second;
-            fir::ExtendedValue exv = getSymbolExtendedValue(*sym, nullptr);
-            reduceOperands.push_back(fir::getBase(exv));
-            auto reduce_attr =
-                fir::ReduceAttr::get(builder->getContext(), reduce_operation);
-            reduceAttrs.push_back(reduce_attr);
-          }
-          // The loop variable value is explicitly updated.
-          info.doLoop = builder->create<fir::DoLoopOp>(
-              loc, lowerValue, upperValue, stepValue, /*unordered=*/true,
-              /*finalCountValue=*/false, /*iterArgs=*/std::nullopt,
-              llvm::ArrayRef<mlir::Value>(reduceOperands), reduceAttrs);
-          builder->setInsertionPointToStart(info.doLoop.getBody());
-          loopValue = builder->createConvert(loc, loopVarType,
-                                             info.doLoop.getInductionVar());
-        } else {
-          // The loop variable is a doLoop op argument.
-          info.doLoop = builder->create<fir::DoLoopOp>(
-              loc, lowerValue, upperValue, stepValue, /*unordered=*/false,
-              /*finalCountValue=*/true,
-              builder->createConvert(loc, loopVarType, lowerValue));
-          builder->setInsertionPointToStart(info.doLoop.getBody());
-          loopValue = info.doLoop.getRegionIterArgs()[0];
-        }
+        auto loopOp = builder->create<fir::DoLoopOp>(
+            loc, lowerValue, upperValue, stepValue, /*unordered=*/false,
+            /*finalCountValue=*/true,
+            builder->createConvert(loc, loopVarType, lowerValue));
+        info.loopOp = loopOp;
+        builder->setInsertionPointToStart(loopOp.getBody());
+        mlir::Value loopValue = loopOp.getRegionIterArgs()[0];
+
         // Update the loop variable value in case it has non-index references.
         builder->create<fir::StoreOp>(loc, loopValue, info.loopVariable);
-        if (info.maskExpr) {
-          Fortran::lower::StatementContext stmtCtx;
-          mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx);
-          stmtCtx.finalizeAndReset();
-          mlir::Value maskCondCast =
-              builder->createConvert(loc, builder->getI1Type(), maskCond);
-          auto ifOp = builder->create<fir::IfOp>(loc, maskCondCast,
-                                                 /*withElseRegion=*/false);
-          builder->setInsertionPointToStart(&ifOp.getThenRegion().front());
-        }
-        if (info.hasLocalitySpecs())
-          handleLocalitySpecs(info);
-
         addLoopAnnotationAttr(info, dirs);
         continue;
       }
@@ -2430,6 +2417,60 @@ class FirConverter : public Fortran::lower::AbstractConverter {
         builder->restoreInsertionPoint(insertPt);
       }
     }
+
+    if (genDoConcurrent) {
+      auto loopWrapperOp = builder->create<fir::DoConcurrentOp>(loc);
+      builder->setInsertionPointToStart(
+          builder->createBlock(&loopWrapperOp.getRegion()));
+
+      for (IncrementLoopInfo &info : llvm::reverse(incrementLoopNestInfo)) {
+        info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym,
+                                                   info.isUnordered);
+      }
+
+      builder->setInsertionPointToEnd(loopWrapperOp.getBody());
+      auto loopOp = builder->create<fir::DoConcurrentLoopOp>(
+          loc, nestLBs, nestUBs, nestSts, nestReduceOperands,
+          nestReduceAttrs.empty()
+              ? nullptr
+              : mlir::ArrayAttr::get(builder->getContext(), nestReduceAttrs),
+          nullptr);
+
+      llvm::SmallVector<mlir::Type> loopBlockArgTypes(
+          incrementLoopNestInfo.size(), builder->getIndexType());
+      llvm::SmallVector<mlir::Location> loopBlockArgLocs(
+          incrementLoopNestInfo.size(), loc);
+      mlir::Region &loopRegion = loopOp.getRegion();
+      mlir::Block *loopBlock = builder->createBlock(
+          &loopRegion, loopRegion.begin(), loopBlockArgTypes, loopBlockArgLocs);
+      builder->setInsertionPointToStart(loopBlock);
+
+      for (auto [info, blockArg] :
+           llvm::zip_equal(incrementLoopNestInfo, loopBlock->getArguments())) {
+        info.loopOp = loopOp;
+        mlir::Value loopValue =
+            builder->createConvert(loc, info.getLoopVariableType(), blockArg);
+        builder->create<fir::StoreOp>(loc, loopValue, info.loopVariable);
+
+        if (info.maskExpr) {
+          Fortran::lower::StatementContext stmtCtx;
+          mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx);
+          stmtCtx.finalizeAndReset();
+          mlir::Value maskCondCast =
+              builder->createConvert(loc, builder->getI1Type(), maskCond);
+          auto ifOp = builder->create<fir::IfOp>(loc, maskCondCast,
+                                                 /*withElseRegion=*/false);
+          builder->setInsertionPointToStart(&ifOp.getThenRegion().front());
+        }
+      }
+
+      IncrementLoopInfo &innermostInfo = incrementLoopNestInfo.back();
+
+      if (innermostInfo.hasLocalitySpecs())
+        handleLocalitySpecs(innermostInfo);
+
+      addLoopAnnotationAttr(innermostInfo, dirs);
+    }
   }
 
   /// Generate FIR to end a structured or unstructured increment loop nest.
@@ -2446,29 +2487,31 @@ class FirConverter : public Fortran::lower::AbstractConverter {
          it != rend; ++it) {
       IncrementLoopInfo &info = *it;
       if (info.isStructured()) {
-        // End fir.do_loop.
+        // End fir.do_concurent.loop.
         if (info.isUnordered) {
-          builder->setInsertionPointAfter(info.doLoop);
+          builder->setInsertionPointAfter(info.loopOp->getParentOp());
           continue;
         }
+
+        // End fir.do_loop.
         // Decrement tripVariable.
-        builder->setInsertionPointToEnd(info.doLoop.getBody());
+        auto doLoopOp = mlir::cast<fir::DoLoopOp>(info.loopOp);
+        builder->setInsertionPointToEnd(doLoopOp.getBody());
         llvm::SmallVector<mlir::Value, 2> results;
         results.push_back(builder->create<mlir::arith::AddIOp>(
-            loc, info.doLoop.getInductionVar(), info.doLoop.getStep(),
-            iofAttr));
+            loc, doLoopOp.getInductionVar(), doLoopOp.getStep(), iofAttr));
         // Step loopVariable to help optimizations such as vectorization.
         // Induction variable elimination will clean up as necessary.
         mlir::Value step = builder->createConvert(
-            loc, info.getLoopVariableType(), info.doLoop.getStep());
+            loc, info.getLoopVariableType(), doLoopOp.getStep());
         mlir::Value loopVar =
             builder->create<fir::LoadOp>(loc, info.loopVariable);
         results.push_back(
             builder->create<mlir::arith::AddIOp>(loc, loopVar, step, iofAttr));
         builder->create<fir::ResultOp>(loc, results);
-        builder->setInsertionPointAfter(info.doLoop);
+        builder->setInsertionPointAfter(doLoopOp);
         // The loop control variable may be used after the loop.
-        builder->create<fir::StoreOp>(loc, info.doLoop.getResult(1),
+        builder->create<fir::StoreOp>(loc, doLoopOp.getResult(1),
                                       info.loopVariable);
         continue;
       }
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index b7f8a8d3a9d56..8e7f47a8f2a9b 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -279,6 +279,9 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() {
   if (auto cufKernelOp = getRegion().getParentOfType<cuf::KernelOp>())
     return &cufKernelOp.getRegion().front();
 
+  if (auto doConcurentOp = getRegion().getParentOfType<fir::DoConcurrentOp>())
+    return doConcurentOp.getBody();
+
   return getEntryBlock();
 }
 
diff --git a/flang/test/Lower/do_concurrent.f90 b/flang/test/Lower/do_concurrent.f90
index ef93d2d6b035b..cc113f59c35e3 100644
--- a/flang/test/Lower/do_concurrent.f90
+++ b/flang/test/Lower/do_concurrent.f90
@@ -14,6 +14,9 @@ subroutine sub1(n)
    implicit none
    integer :: n, m, i, j, k
    integer, dimension(n) :: a
+!CHECK: %[[N_DECL:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %{{.*}} {uniq_name = "_QFsub1En"}
+!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub1Ea"}
+
 !CHECK: %[[LB1:.*]] = arith.constant 1 : i32
 !CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index
 !CHECK: %[[UB1:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
@@ -29,10 +32,30 @@ subroutine sub1(n)
 !CHECK: %[[UB3:.*]] = arith.constant 10 : i32
 !CHECK: %[[UB3_CVT:.*]] = fir.convert %[[UB3]] : (i32) -> index
 
-!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered
-!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered
-!CHECK: fir.do_loop %{{.*}} = %[[LB3_CVT]] to %[[UB3_CVT]] step %{{.*}} unordered
+!CHECK: fir.do_concurrent
+!CHECK:   %[[I:.*]] = fir.alloca i32 {bindc_name = "i"}
+!CHECK:   %[[I_DECL:.*]]:2 = hlfir.declare %[[I]]
+!CHECK:   %[[J:.*]] = fir.alloca i32 {bindc_name = "j"}
+!CHECK:   %[[J_DECL:.*]]:2 = hlfir.declare %[[J]]
+!CHECK:   %[[K:.*]] = fir.alloca i32 {bindc_name = "k"}
+!CHECK:   %[[K_DECL:.*]]:2 = hlfir.declare %[[K]]
+
+!CHECK:   fir.do_concurrent.loop (%[[I_IV:.*]], %[[J_IV:.*]], %[[K_IV:.*]]) =
+!CHECK-SAME:                     (%[[LB1_CVT]], %[[LB2_CVT]], %[[LB3_CVT]]) to
+!CHECK-SAME:                     (%[[UB1_CVT]], %[[UB2_CVT]], %[[UB3_CVT]]) step
+!CHECK-SAME:                     (%{{.*}}, %{{.*}}, %{{.*}}) {
+!CHECK:       %[[I_IV_CVT:.*]] = fir.convert %[[I_IV]] : (index) -> i32
+!CHECK:       fir.store %[[I_IV_CVT]] to %[[I_DECL]]#0 : !fir.ref<i32>
+!CHECK:       %[[J_IV_CVT:.*]] = fir.convert %[[J_IV]] : (index) -> i32
+!CHECK:       fir.store %[[J_IV_CVT]] to %[[J_DECL]]#0 : !fir.ref<i32>
+!CHECK:       %[[K_IV_CVT:.*]] = fir.convert %[[K_IV]] : (index) -> i32
+!CHECK:       fir.store %[[K_IV_CVT]] to %[[K_DECL]]#0 : !fir.ref<i32>
 
+!CHECK:       %[[N_VAL:.*]] = fir.load %[[N_DECL]]#0 : !fir.ref<i32>
+!CHECK:       %[[I_VAL:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
+!CHECK:       %[[I_VAL_CVT:.*]] = fir.convert %[[I_VAL]] : (i32) -> i64
+!CHECK:       %[[A_ELEM:.*]] = hlfir.designate %[[A_DECL]]#0 (%[[I_VAL_CVT]])
+!CHECK:       hlfir.assign %[[N_VAL]] to %[[A_ELEM]] : i32, !fir.ref<i32>
    do concurrent(i=1:n, j=1:bar(n*m, n/m), k=5:10)
       a(i) = n
    end do
@@ -45,14 +68,17 @@ subroutine sub2(n)
    integer, dimension(n) :: a
 !CHECK: %[[LB1:.*]] = arith.constant 1 : i32
 !CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index
-!CHECK: %[[UB1:.*]] = fir.load %5#0 : !fir.ref<i32>
+!CHECK: %[[UB1:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
 !CHECK: %[[UB1_CVT:.*]] = fir.convert %[[UB1]] : (i32) -> index
-!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered
+!CHECK: fir.do_concurrent
+!CHECK:   fir.do_concurrent.loop (%{{.*}}) = (%[[LB1_CVT]]) to (%[[UB1_CVT]]) step (%{{.*}})
+
 !CHECK: %[[LB2:.*]] = arith.constant 1 : i32
 !CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> index
 !CHECK: %[[UB2:.*]] = fir.call @_QPbar(%{{.*}}, %{{.*}}) proc_attrs<pure> fastmath<contract> : (!fir.ref<i32>, !fir.ref<i32>) -> i32
 !CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> index
-!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered
+!CHECK: fir.do_concurrent
+!CHECK:   fir.do_concurrent.loop (%{{.*}}) = (%[[LB2_CVT]]) to (%[[UB2_CVT]]) step (%{{.*}})
    do concurrent(i=1:n)
       do concurrent(j=1:bar(n*m, n/m))
          a(i) = n
@@ -60,7 +86,6 @@ subroutine sub2(n)
    end do
 end subroutine
 
-
 !CHECK-LABEL: unstructured
 subroutine unstructured(inner_step)
   integer(4) :: i, j, inner_step
diff --git a/flang/test/Lower/do_concurrent_local_default_init.f90 b/flang/test/Lower/do_concurrent_local_default_init.f90
index 7652e4fcd0402..207704ac1a990 100644
--- a/flang/test/Lower/do_concurrent_local_default_init.f90
+++ b/flang/test/Lower/do_concurrent_local_default_init.f90
@@ -29,7 +29,7 @@ subroutine test_default_init()
 ! CHECK-SAME:                           %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>> {fir.bindc_name = "p"}) {
 ! CHECK:           %[[VAL_6:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>
 ! CHECK:           %[[VAL_7:.*]] = fir.box_elesize %[[VAL_6]] : (!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>) -> index
-! CHECK:           fir.do_loop
+! CHECK:           fir.do_concurrent.loop
 ! CHECK:             %[[VAL_16:.*]] = fir.alloca !fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>> {bindc_name = "p", pinned, uniq_name = "_QFtest_ptrEp"}
 ! CHECK:             %[[VAL_17:.*]] = fir.zero_bits !fir.ptr<!fir.array<?x!fir.char<1,?>>>
 ! CHECK:             %[[VAL_18:.*]] = arith.constant 0 : index
@@ -43,7 +43,7 @@ subroutine test_default_init()
 ! CHECK:         }
 
 ! CHECK-LABEL:   func.func @_QPtest_default_init(
-! CHECK:           fir.do_loop
+! CHECK:           fir.do_concurrent.loop
 ! CHECK:             %[[VAL_26:.*]] = fir.alloca !fir.type<_QFtest_default_initTt{i:i32}> {bindc_name = "a", pinned, uniq_name = "_QFtest_default_initEa"}
 ! CHECK:             %[[VAL_27:.*]] = fir.embox %[[VAL_26]] : (!fir.ref<!fir.type<_QFtest_default_initTt{i:i32}>>) -> !fir.box<!fir.type<_QFtest_default_initTt{i:i32}>>
 ! CHECK:             %[[VAL_30:.*]] = fir.convert %[[VAL_27]] : (!fir.box<!fir.type<_QFtest_default_initTt{i:i32}>>) -> !fir.box<none>
diff --git a/flang/test/Lower/loops.f90 b/flang/test/Lower/loops.f90
index ea65ba3e4d66d..60df27a591dc3 100644
--- a/flang/test/Lower/loops.f90
+++ b/flang/test/Lower/loops.f90
@@ -2,15 +2,6 @@
 
 ! CHECK-LABEL: loop_test
 subroutine loop_test
-  ! CHECK: %[[VAL_2:.*]] = fir.alloca i16 {bindc_name = "i"}
-  ! CHECK: %[[VAL_3:.*]] = fir.alloca i16 {bindc_name = "i"}
-  ! CHECK: %[[VAL_4:.*]] = fir.alloca i16 {bindc_name = "i"}
-  ! CHECK: %[[VAL_5:.*]] = fir.alloca i8 {bindc_name = "k"}
-  ! CHECK: %[[VAL_6:.*]] = fir.alloca i8 {bindc_name = "j"}
-  ! CHECK: %[[VAL_7:.*]] = fir.alloca i8 {bindc_name = "i"}
-  ! CHECK: %[[VAL_8:.*]] = fir.alloca i32 {bindc_name = "k"}
-  ! CHECK: %[[VAL_9:.*]] = fir.alloca i32 {bindc_name = "j"}
-  ! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i"}
   ! CHECK: %[[VAL_11:.*]] = fir.alloca !fir.array<5x5x5xi32> {bindc_name = "a", uniq_name = "_QFloop_testEa"}
   ! CHECK...
[truncated]

ergawy · 2025-03-26T15:02:28Z

Please have a look when you have time 🙏!

tblah

Thanks for working on this Kareem. Just a few questions.

Could this be made any simpler by using hlfir::genLoopNest? No worries if it isn't a good fit.

tblah · 2025-03-26T16:58:53Z

flang/lib/Lower/Bridge.cpp

-        if (boundsAndStepIP)
-          builder->setInsertionPointAfter(boundsAndStepIP);
+    for (IncrementLoopInfo &info : incrementLoopNestInfo) {
+      genDoConcurrent = info.isStructured() && info.isUnordered;


The isUnordered field has a comment saying that it is also used for FORALL (although that may not still be true in the HLFIR lowering path). I think it would be better to have a field that specifically says this is supposed to be a do concurrent.

isUnordered is also used for array syntax.

I might be missing something, but from the code, it seems like IncrementLoopNestInfo::isUnordered is set to true only for do concurrent loops (in getConcurrentControl). So at this stage, it might be safe to just rename it to isDoConcurrent, I think.

Renaming is fine for me. That will make it clear this is the only use and prevent future conflation of uses.

tblah · 2025-03-26T17:05:26Z

flang/lib/Lower/Bridge.cpp

+        builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
+      }
+
+      if (genDoConcurrent && nestReduceOperands.empty()) {


Why is the check for nestReduceOperands.empty() needed here?

Because getConcurrentControl (https://github.com/llvm/llvm-project/blob/main/flang/lib/Lower/Bridge.cpp#L1998) adds reductions to all IncrementLoopInfo instances of the nest.

But if people more familiar with this part think it is a good idea, we can just add the reductions to the last IncrementLoopInfo instance like other locality specifiers.

Thanks for explaining. I am not particularly familiar with this bit of code but only adding them to the last IncrementLoopInfo sounds okay to me.

tblah · 2025-03-26T17:09:11Z

flang/lib/Lower/Bridge.cpp

      // Structured loop - generate fir.do_loop.
      if (info.isStructured()) {
+        if (info.isUnordered)


I think it would be clearer to check here for genDoConcurrent

clementval · 2025-03-26T21:41:58Z

flang/lib/Optimizer/Builder/FIRBuilder.cpp

+  if (auto doConcurentOp = getRegion().getParentOfType<fir::DoConcurrentOp>())
+    return doConcurentOp.getBody();


Do we need this for any allocation in the do concurrent? We did not have this so far so I'm just wondering.

We do it this way for the new fir.do_concurrent and fir.do_concurrent.loop ops to keep all info related to the loop nest closely located in the IR. The allocations needed here are for the nest iteration variables. See: https://discourse.llvm.org/t/modeling-do-concurrent-loops-in-the-fir-dialect/84950/6?u=bob.belcher and related discussion and https://github.com/llvm/llvm-project/blob/main/flang/include/flang/Optimizer/Dialect/FIROps.td#L3457.

Adds support for lowering `do concurrent` nests from PFT to the new `fir.do_concurrent` MLIR op as well as its special terminator `fir.do_concurrent.loop` which models the actual loop nest. To that end, this PR emits the allocations for the iteration variables within the block of the `fir.do_concurrent` op and creates a region for the `fir.do_concurrent.loop` op that accepts arguments equal in number to the number of the input `do concurrent` iteration ranges. For example, given the following input: ```fortran do concurrent(i=1:10, j=11:20) end do ``` the changes in this PR emit the following MLIR: ```mlir fir.do_concurrent { %22 = fir.alloca i32 {bindc_name = "i"} %23:2 = hlfir.declare %22 {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) %24 = fir.alloca i32 {bindc_name = "j"} %25:2 = hlfir.declare %24 {uniq_name = "_QFsub1Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) fir.do_concurrent.loop (%arg1, %arg2) = (%18, %20) to (%19, %21) step (%c1, %c1_0) { %26 = fir.convert %arg1 : (index) -> i32 fir.store %26 to %23#0 : !fir.ref<i32> %27 = fir.convert %arg2 : (index) -> i32 fir.store %27 to %25#0 : !fir.ref<i32> } } ```

ergawy · 2025-03-27T04:26:50Z

Could this be made any simpler by using hlfir::genLoopNest? No worries if it isn't a good fit.

I didn't know about this util, thanks for pointing it out. I think it won't work out of the box since genLoopNest assumes iteration ranges start from 1. I think it can be generalized but maybe in a separate PR?

tblah

LGTM once @clementval is happy

tblah · 2025-03-27T15:38:03Z

flang/lib/Lower/Bridge.cpp

@@ -97,7 +97,8 @@ struct IncrementLoopInfo {
                             bool isUnordered = false)
      : loopVariableSym{&sym}, lowerExpr{Fortran::semantics::GetExpr(lower)},
        upperExpr{Fortran::semantics::GetExpr(upper)},
-        stepExpr{Fortran::semantics::GetExpr(step)}, isUnordered{isUnordered} {}
+        stepExpr{Fortran::semantics::GetExpr(step)}, isConcurrent{isUnordered} {


nit

Suggested change

stepExpr{Fortran::semantics::GetExpr(step)}, isConcurrent{isUnordered} {

stepExpr{Fortran::semantics::GetExpr(step)}, isConcurrent{isConcurrent} {

tblah · 2025-03-27T16:04:37Z

flang/lib/Lower/Bridge.cpp

+        builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
+      }
+
+      if (genDoConcurrent && nestReduceOperands.empty()) {


Thanks for explaining. I am not particularly familiar with this bit of code but only adding them to the last IncrementLoopInfo sounds okay to me.

llvmbot added flang flang:fir-hlfir labels Mar 25, 2025

ergawy requested review from clementval, tblah, kiranchandramohan, jeanPerier and vdonaldson March 25, 2025 09:28

ergawy force-pushed the users/ergawy/pft_to_do_concurrent branch from c85c0ef to 421e550 Compare March 25, 2025 09:41

IanTaylerLessa-arm mentioned this pull request Mar 25, 2025

[mlir][tosa] Avoid overflow in reduction folders #132786

Merged

ergawy force-pushed the users/ergawy/pft_to_do_concurrent branch from 421e550 to 252093c Compare March 25, 2025 11:34

tblah reviewed Mar 26, 2025

View reviewed changes

clementval reviewed Mar 26, 2025

View reviewed changes

handle review comments

4cefbea

ergawy force-pushed the users/ergawy/pft_to_do_concurrent branch from 252093c to 4cefbea Compare March 27, 2025 04:48

ergawy added 2 commits March 27, 2025 00:11

update comment

2b9751c

rename isUnordered

2122290

ergawy force-pushed the users/ergawy/pft_to_do_concurrent branch from 58bcb4b to 2122290 Compare March 27, 2025 14:08

tblah approved these changes Mar 27, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[flang][fir] Lower `do concurrent` loop nests to `fir.do_concurrent` #132904

[flang][fir] Lower `do concurrent` loop nests to `fir.do_concurrent` #132904

ergawy commented Mar 25, 2025

llvmbot commented Mar 25, 2025

ergawy commented Mar 26, 2025

tblah left a comment •

edited

Loading

tblah Mar 26, 2025

clementval Mar 26, 2025

ergawy Mar 27, 2025 •

edited

Loading

tblah Mar 27, 2025

ergawy Mar 27, 2025

tblah Mar 26, 2025

ergawy Mar 27, 2025

tblah Mar 27, 2025

tblah Mar 26, 2025

ergawy Mar 27, 2025

clementval Mar 26, 2025

ergawy Mar 27, 2025 •

edited

Loading

ergawy commented Mar 27, 2025

tblah left a comment

tblah Mar 27, 2025 •

edited

Loading

tblah Mar 27, 2025

		if (auto doConcurentOp = getRegion().getParentOfType<fir::DoConcurrentOp>())
		return doConcurentOp.getBody();

	stepExpr{Fortran::semantics::GetExpr(step)}, isConcurrent{isUnordered} {
	stepExpr{Fortran::semantics::GetExpr(step)}, isConcurrent{isConcurrent} {

[flang][fir] Lower do concurrent loop nests to fir.do_concurrent #132904

Are you sure you want to change the base?

[flang][fir] Lower do concurrent loop nests to fir.do_concurrent #132904

Conversation

ergawy commented Mar 25, 2025

llvmbot commented Mar 25, 2025

ergawy commented Mar 26, 2025

tblah left a comment • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

ergawy Mar 27, 2025 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

ergawy Mar 27, 2025 • edited Loading

Choose a reason for hiding this comment

ergawy commented Mar 27, 2025

tblah left a comment

Choose a reason for hiding this comment

tblah Mar 27, 2025 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

[flang][fir] Lower `do concurrent` loop nests to `fir.do_concurrent` #132904

[flang][fir] Lower `do concurrent` loop nests to `fir.do_concurrent` #132904

tblah left a comment •

edited

Loading

ergawy Mar 27, 2025 •

edited

Loading

ergawy Mar 27, 2025 •

edited

Loading

tblah Mar 27, 2025 •

edited

Loading