Merge pull request #1574 from Kenneth-T-Moore/ken1

Added Stall Detection to the nonlinear solvers.
OpenMDAO · Aug 5, 2020 · c253264 · c253264
2 parents 1ef88ff + 8c59abe
commit c253264
Show file tree

Hide file tree

Showing 5 changed files with 290 additions and 78 deletions.
diff --git a/openmdao/docs/features/building_blocks/solvers/nonlinear/broyden.rst b/openmdao/docs/features/building_blocks/solvers/nonlinear/broyden.rst
@@ -106,3 +106,24 @@ number of iterations, though keep in mind that solving for the derivatives adds
   .. embed-code::
       openmdao.solvers.nonlinear.tests.test_broyden.TestBryodenFeature.test_circuit_options
       :layout: code, output
+
+**stall_limit and stall_tol**
+
+  In some cases, nonlinear solvers can stall out where the norm of the residual stops changing at all. This
+  can happen for a couple of reasons. You can hit numerical noise problems and just be wandering around in
+  a circle, or you can get stuck on a bound and the line search just keeps running into the same spot no
+  matter what. Either way, if you have say 100 max iterations and you stall at 15 ... you waste a lot of
+  compute time. To remedy this, you can turn on stall detection in all nonlinear solvers by setting the
+  "stall_limit" option to a number greater than zero.
+
+  In this example, we set stall_limit to 3. While the solver iterates, it will compare the value of the
+  residual norm to the value computed in the previous iteration.  If the value matches for three iterations
+  in a row, then iteration will terminate due to detection of a stall. If "err_on_non_converge" is set
+  to True, then an ``AnalysisError`` will be raised just as if we had reached the iteration count limit.
+
+  We also set the `stall_tol` to 1e-6, which is the threshold below which a change in the relative residual
+  norm is considered to be unchanged.
+
+  .. embed-code::
+      openmdao.solvers.tests.test_solver_features.TestSolverFeatures.test_feature_stall_detection_broyden
+      :layout: interleave
diff --git a/openmdao/docs/features/building_blocks/solvers/nonlinear/newton.rst b/openmdao/docs/features/building_blocks/solvers/nonlinear/newton.rst
@@ -113,6 +113,28 @@ NewtonSolver Option Examples
 
   This feature can be set on any iterative nonlinear or linear solver.
 
+**stall_limit and stall_tol**
+
+  In some cases, nonlinear solvers can stall out where the norm of the residual stops changing at all. This
+  can happen for a couple of reasons. You can hit numerical noise problems and just be wandering around in
+  a circle, or you can get stuck on a bound and the line search just keeps running into the same spot no
+  matter what. Either way, if you have say 100 max iterations and you stall at 15 ... you waste a lot of
+  compute time. To remedy this, you can turn on stall detection in all nonlinear solvers by setting the
+  "stall_limit" option to a number greater than zero.
+
+  In this example, we set stall_limit to 3. While the solver iterates, it will compare the value of the
+  residual norm to the value computed in the previous iteration.  If the value matches for three iterations
+  in a row, then iteration will terminate due to detection of a stall. If "err_on_non_converge" is set
+  to True, then an ``AnalysisError`` will be raised just as if we had reached the iteration count limit.
+
+  We also set the `stall_tol` to 1e-6, which is the threshold below which a change in the relative residual
+  norm is considered to be unchanged.
+
+  .. embed-code::
+      openmdao.solvers.tests.test_solver_features.TestSolverFeatures.test_feature_stall_detection_newton
+      :layout: interleave
+
+
 Specifying a Linear Solver
 --------------------------
 

diff --git a/openmdao/solvers/linesearch/backtracking.py b/openmdao/solvers/linesearch/backtracking.py
@@ -225,10 +225,8 @@ def _solve(self):
 
             self._run_apply()
             norm = self._iter_get_norm()
-            # With solvers, we want to record the norm AFTER
-            # the call, but the call needs to
-            # be wrapped in the with for stack purposes,
-            # so we locally assign  norm & norm0 into the class.
+
+            # Save the norm values in the context manager so they can also be recorded.
             rec.abs = norm
             rec.rel = norm / norm0
 
@@ -444,9 +442,7 @@ def _solve(self):
 
                     phi = self._line_search_objective()
 
-                    # With solvers, we want to report the norm AFTER
-                    # the iter_execute call, but the i_e call needs to
-                    # be wrapped in the with for stack purposes.
+                    # Save the norm values in the context manager so they can also be recorded.
                     rec.abs = phi
                     rec.rel = phi / phi0
 

diff --git a/openmdao/solvers/solver.py b/openmdao/solvers/solver.py
@@ -374,77 +374,6 @@ def _mpi_print_header(self):
         """
         pass
 
-    def _solve(self):
-        """
-        Run the iterative solver.
-        """
-        maxiter = self.options['maxiter']
-        atol = self.options['atol']
-        rtol = self.options['rtol']
-        iprint = self.options['iprint']
-
-        self._mpi_print_header()
-
-        self._iter_count = 0
-        norm0, norm = self._iter_initialize()
-
-        self._norm0 = norm0
-
-        self._mpi_print(self._iter_count, norm, norm / norm0)
-
-        while self._iter_count < maxiter and norm > atol and norm / norm0 > rtol:
-            with Recording(type(self).__name__, self._iter_count, self) as rec:
-                self._single_iteration()
-                self._iter_count += 1
-                self._run_apply()
-                norm = self._iter_get_norm()
-                # With solvers, we want to record the norm AFTER the call, but the call needs to
-                # be wrapped in the with for stack purposes, so we locally assign  norm & norm0
-                # into the class.
-                rec.abs = norm
-                if norm0 == 0:
-                    norm0 = 1
-                rec.rel = norm / norm0
-
-            self._mpi_print(self._iter_count, norm, norm / norm0)
-
-        system = self._system()
-        if system.comm.rank == 0 or os.environ.get('USE_PROC_FILES'):
-            prefix = self._solver_info.prefix + self.SOLVER
-
-            # Solver terminated early because a Nan in the norm doesn't satisfy the while-loop
-            # conditionals.
-            if np.isinf(norm) or np.isnan(norm):
-                msg = "Solver '{}' on system '{}': residuals contain 'inf' or 'NaN' after {} " + \
-                      "iterations."
-                if iprint > -1:
-                    print(prefix + msg.format(self.SOLVER, system.pathname,
-                                              self._iter_count))
-
-                # Raise AnalysisError if requested.
-                if self.options['err_on_non_converge']:
-                    raise AnalysisError(msg.format(self.SOLVER, system.pathname,
-                                                   self._iter_count))
-
-            # Solver hit maxiter without meeting desired tolerances.
-            elif (norm > atol and norm / norm0 > rtol):
-                msg = "Solver '{}' on system '{}' failed to converge in {} iterations."
-
-                if iprint > -1:
-                    print(prefix + msg.format(self.SOLVER, system.pathname,
-                                              self._iter_count))
-
-                # Raise AnalysisError if requested.
-                if self.options['err_on_non_converge']:
-                    raise AnalysisError(msg.format(self.SOLVER, system.pathname,
-                                                   self._iter_count))
-
-            # Solver converged
-            elif iprint == 1:
-                print(prefix + ' Converged in {} iterations'.format(self._iter_count))
-            elif iprint == 2:
-                print(prefix + ' Converged')
-
     def _iter_initialize(self):
         """
         Perform any necessary pre-processing operations.
@@ -597,6 +526,14 @@ def _declare_options(self):
                              desc='If true, the values of input and output variables at '
                                   'the start of iteration are printed and written to a file '
                                   'after a failure to converge.')
+        self.options.declare('stall_limit', default=0,
+                             desc='Number of iterations after which, if the residual norms are '
+                                  'identical within the stall_tol, then terminate as if max '
+                                  'iterations were reached. Default is 0, which disables this '
+                                  'feature.')
+        self.options.declare('stall_tol', default=1e-12,
+                             desc='When stall checking is enabled, the threshold below which the '
+                                  'residual norm is considered unchanged.')
 
     def solve(self):
         """
@@ -633,6 +570,100 @@ def _iter_initialize(self):
         norm0 = norm if norm != 0.0 else 1.0
         return norm0, norm
 
+    def _solve(self):
+        """
+        Run the iterative solver.
+        """
+        maxiter = self.options['maxiter']
+        atol = self.options['atol']
+        rtol = self.options['rtol']
+        iprint = self.options['iprint']
+        stall_limit = self.options['stall_limit']
+        stall_tol = self.options['stall_tol']
+
+        self._mpi_print_header()
+
+        self._iter_count = 0
+        norm0, norm = self._iter_initialize()
+
+        self._norm0 = norm0
+
+        self._mpi_print(self._iter_count, norm, norm / norm0)
+
+        stalled = False
+        if stall_limit > 0:
+            stall_count = 0
+            stall_norm = norm0
+
+        while self._iter_count < maxiter and norm > atol and norm / norm0 > rtol and not stalled:
+            with Recording(type(self).__name__, self._iter_count, self) as rec:
+                self._single_iteration()
+                self._iter_count += 1
+                self._run_apply()
+                norm = self._iter_get_norm()
+
+                # Save the norm values in the context manager so they can also be recorded.
+                rec.abs = norm
+                if norm0 == 0:
+                    norm0 = 1
+                rec.rel = norm / norm0
+
+                # Check if convergence is stalled.
+                if stall_limit > 0:
+                    rel_norm = rec.rel
+                    norm_diff = np.abs(stall_norm - rel_norm)
+                    if norm_diff <= stall_tol:
+                        stall_count += 1
+                        if stall_count >= stall_limit:
+                            stalled = True
+                    else:
+                        stall_count = 0
+                        stall_norm = rel_norm
+
+            self._mpi_print(self._iter_count, norm, norm / norm0)
+
+        system = self._system()
+        if system.comm.rank == 0 or os.environ.get('USE_PROC_FILES'):
+            prefix = self._solver_info.prefix + self.SOLVER
+
+            # Solver terminated early because a Nan in the norm doesn't satisfy the while-loop
+            # conditionals.
+            if np.isinf(norm) or np.isnan(norm):
+                msg = "Solver '{}' on system '{}': residuals contain 'inf' or 'NaN' after {} " + \
+                      "iterations."
+                if iprint > -1:
+                    print(prefix + msg.format(self.SOLVER, system.pathname,
+                                              self._iter_count))
+
+                # Raise AnalysisError if requested.
+                if self.options['err_on_non_converge']:
+                    raise AnalysisError(msg.format(self.SOLVER, system.pathname,
+                                                   self._iter_count))
+
+            # Solver hit maxiter without meeting desired tolerances.
+            # Or solver stalled.
+            elif (norm > atol and norm / norm0 > rtol) or stalled:
+
+                if stalled:
+                    msg = "Solver '{}' on system '{}' stalled after {} iterations."
+                else:
+                    msg = "Solver '{}' on system '{}' failed to converge in {} iterations."
+
+                if iprint > -1:
+                    print(prefix + msg.format(self.SOLVER, system.pathname,
+                                              self._iter_count))
+
+                # Raise AnalysisError if requested.
+                if self.options['err_on_non_converge']:
+                    raise AnalysisError(msg.format(self.SOLVER, system.pathname,
+                                                   self._iter_count))
+
+            # Solver converged
+            elif iprint == 1:
+                print(prefix + ' Converged in {} iterations'.format(self._iter_count))
+            elif iprint == 2:
+                print(prefix + ' Converged')
+
     def _run_apply(self):
         """
         Run the apply_nonlinear method on the system.
@@ -784,6 +815,76 @@ def solve(self, vec_names, mode, rel_systems=None):
         """
         raise NotImplementedError("class %s does not implement solve()." % (type(self).__name__))
 
+    def _solve(self):
+        """
+        Run the iterative solver.
+        """
+        maxiter = self.options['maxiter']
+        atol = self.options['atol']
+        rtol = self.options['rtol']
+        iprint = self.options['iprint']
+
+        self._mpi_print_header()
+
+        self._iter_count = 0
+        norm0, norm = self._iter_initialize()
+
+        self._norm0 = norm0
+
+        self._mpi_print(self._iter_count, norm, norm / norm0)
+
+        while self._iter_count < maxiter and norm > atol and norm / norm0 > rtol:
+            with Recording(type(self).__name__, self._iter_count, self) as rec:
+                self._single_iteration()
+                self._iter_count += 1
+                self._run_apply()
+                norm = self._iter_get_norm()
+
+                # Save the norm values in the context manager so they can also be recorded.
+                rec.abs = norm
+                if norm0 == 0:
+                    norm0 = 1
+                rec.rel = norm / norm0
+
+            self._mpi_print(self._iter_count, norm, norm / norm0)
+
+        system = self._system()
+        if system.comm.rank == 0 or os.environ.get('USE_PROC_FILES'):
+            prefix = self._solver_info.prefix + self.SOLVER
+
+            # Solver terminated early because a Nan in the norm doesn't satisfy the while-loop
+            # conditionals.
+            if np.isinf(norm) or np.isnan(norm):
+                msg = "Solver '{}' on system '{}': residuals contain 'inf' or 'NaN' after {} " + \
+                      "iterations."
+                if iprint > -1:
+                    print(prefix + msg.format(self.SOLVER, system.pathname,
+                                              self._iter_count))
+
+                # Raise AnalysisError if requested.
+                if self.options['err_on_non_converge']:
+                    raise AnalysisError(msg.format(self.SOLVER, system.pathname,
+                                                   self._iter_count))
+
+            # Solver hit maxiter without meeting desired tolerances.
+            elif (norm > atol and norm / norm0 > rtol):
+                msg = "Solver '{}' on system '{}' failed to converge in {} iterations."
+
+                if iprint > -1:
+                    print(prefix + msg.format(self.SOLVER, system.pathname,
+                                              self._iter_count))
+
+                # Raise AnalysisError if requested.
+                if self.options['err_on_non_converge']:
+                    raise AnalysisError(msg.format(self.SOLVER, system.pathname,
+                                                   self._iter_count))
+
+            # Solver converged
+            elif iprint == 1:
+                print(prefix + ' Converged in {} iterations'.format(self._iter_count))
+            elif iprint == 2:
+                print(prefix + ' Converged')
+
     def _run_apply(self):
         """
         Run the apply_linear method on the system.