From db7d088431c05b800c375ba2b32ff89183e05529 Mon Sep 17 00:00:00 2001 From: Bret Naylor Date: Thu, 11 Jan 2024 18:16:25 -0500 Subject: [PATCH 1/3] fixed bug in _setup_iteration_lists when grad_groups existed on some ranks and not others --- openmdao/core/group.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openmdao/core/group.py b/openmdao/core/group.py index 33deeeb9c0..0839e0f0e9 100644 --- a/openmdao/core/group.py +++ b/openmdao/core/group.py @@ -5106,11 +5106,11 @@ def _setup_iteration_lists(self): graph.add_edge(opt_sys, response0) graph.add_edge(response0, opt_sys) - if grad_groups: - if self.comm.size > 1: - grad_groups = self.comm.allgather(grad_groups) - grad_groups = {g for grp in grad_groups for g in grp} + if self.comm.size > 1: + grad_groups = self.comm.allgather(grad_groups) + grad_groups = {g for grp in grad_groups for g in grp} + if grad_groups: remaining = set(grad_groups) for name in sorted(grad_groups, key=lambda x: x.count('.')): prefix = name + '.' From 41e1d16cc79d10ce88ec22b36d41b1a620bd3569 Mon Sep 17 00:00:00 2001 From: Bret Naylor Date: Fri, 12 Jan 2024 09:50:52 -0500 Subject: [PATCH 2/3] fixed MPI hang/singular matrix error --- openmdao/core/group.py | 2 ++ openmdao/solvers/nonlinear/nonlinear_block_jac.py | 2 +- openmdao/solvers/nonlinear/nonlinear_runonce.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/openmdao/core/group.py b/openmdao/core/group.py index 0839e0f0e9..b4750b9318 100644 --- a/openmdao/core/group.py +++ b/openmdao/core/group.py @@ -5110,6 +5110,8 @@ def _setup_iteration_lists(self): grad_groups = self.comm.allgather(grad_groups) grad_groups = {g for grp in grad_groups for g in grp} + print(f"GRAD GROUPS:", sorted(grad_groups)) + if grad_groups: remaining = set(grad_groups) for name in sorted(grad_groups, key=lambda x: x.count('.')): diff --git a/openmdao/solvers/nonlinear/nonlinear_block_jac.py b/openmdao/solvers/nonlinear/nonlinear_block_jac.py index 1c847559af..191c64bb54 100644 --- a/openmdao/solvers/nonlinear/nonlinear_block_jac.py +++ b/openmdao/solvers/nonlinear/nonlinear_block_jac.py @@ -29,7 +29,7 @@ def _single_iteration(self): # If this is a parallel group, check for analysis errors and reraise. if len(system._subsystems_myproc) != len(system._subsystems_allprocs): with multi_proc_fail_check(system.comm): - for subsys in system._subsystems_myproc: + for subsys in system._solver_subsystem_iter(local_only=True): subsys._solve_nonlinear() else: for subsys in system._solver_subsystem_iter(local_only=True): diff --git a/openmdao/solvers/nonlinear/nonlinear_runonce.py b/openmdao/solvers/nonlinear/nonlinear_runonce.py index 3cf6d07aa9..86ef3ccadb 100644 --- a/openmdao/solvers/nonlinear/nonlinear_runonce.py +++ b/openmdao/solvers/nonlinear/nonlinear_runonce.py @@ -37,7 +37,7 @@ def solve(self): system._transfer('nonlinear', 'fwd') with multi_proc_fail_check(system.comm): - for subsys in system._subsystems_myproc: + for subsys in system._solver_subsystem_iter(local_only=True): subsys._solve_nonlinear() # If this is not a parallel group, transfer for each subsystem just prior to running it. From 5f8b9cd7233435aa0b896fa6df82c7ef4bddd7a2 Mon Sep 17 00:00:00 2001 From: Bret Naylor Date: Fri, 12 Jan 2024 12:59:22 -0500 Subject: [PATCH 3/3] added test --- openmdao/core/group.py | 2 - openmdao/core/tests/test_pre_post_iter.py | 211 ++++++++++++---------- 2 files changed, 120 insertions(+), 93 deletions(-) diff --git a/openmdao/core/group.py b/openmdao/core/group.py index b4750b9318..0839e0f0e9 100644 --- a/openmdao/core/group.py +++ b/openmdao/core/group.py @@ -5110,8 +5110,6 @@ def _setup_iteration_lists(self): grad_groups = self.comm.allgather(grad_groups) grad_groups = {g for grp in grad_groups for g in grp} - print(f"GRAD GROUPS:", sorted(grad_groups)) - if grad_groups: remaining = set(grad_groups) for name in sorted(grad_groups, key=lambda x: x.count('.')): diff --git a/openmdao/core/tests/test_pre_post_iter.py b/openmdao/core/tests/test_pre_post_iter.py index b0d8ba1ef3..0e2d0d0639 100644 --- a/openmdao/core/tests/test_pre_post_iter.py +++ b/openmdao/core/tests/test_pre_post_iter.py @@ -6,6 +6,12 @@ from openmdao.test_suite.components.exec_comp_for_test import ExecComp4Test from openmdao.test_suite.components.sellar import SellarDis1withDerivatives, SellarDis2withDerivatives from openmdao.utils.testing_utils import use_tempdirs +from openmdao.utils.mpi import MPI + +try: + from openmdao.parallel_api import PETScVector +except ImportError: + PETScVector = None class MissingPartialsComp(om.ExplicitComponent): @@ -28,97 +34,109 @@ def compute_partials(self, inputs, partials, discrete_inputs=None): partials['z', 'b'] = 3.0 -@use_tempdirs -class TestPrePostIter(unittest.TestCase): +def setup_problem(do_pre_post_opt, mode, use_ivc=False, coloring=False, size=3, group=False, + force=(), approx=False, force_complex=False, recording=False, parallel=False): + prob = om.Problem() + prob.options['group_by_pre_opt_post'] = do_pre_post_opt - def setup_problem(self, do_pre_post_opt, mode, use_ivc=False, coloring=False, size=3, group=False, - force=(), approx=False, force_complex=False, recording=False): - prob = om.Problem() - prob.options['group_by_pre_opt_post'] = do_pre_post_opt + prob.driver = om.ScipyOptimizeDriver(optimizer='SLSQP', disp=False) + prob.set_solver_print(level=0) - prob.driver = om.ScipyOptimizeDriver(optimizer='SLSQP', disp=False) - prob.set_solver_print(level=0) + model = prob.model - model = prob.model + if approx: + model.approx_totals() - if approx: - model.approx_totals() - - if use_ivc: - model.add_subsystem('ivc', om.IndepVarComp('x', np.ones(size))) + if use_ivc: + model.add_subsystem('ivc', om.IndepVarComp('x', np.ones(size))) - if group: - G1 = model.add_subsystem('G1', om.Group(), promotes=['*']) - G2 = model.add_subsystem('G2', om.Group(), promotes=['*']) + if parallel: + par = model.add_subsystem('par', om.ParallelGroup(), promotes=['*']) + par.nonlinear_solver = om.NonlinearBlockJac() + parent = par + else: + parent = model + + if group: + G1 = parent.add_subsystem('G1', om.Group(), promotes=['*']) + G2 = parent.add_subsystem('G2', om.Group(), promotes=['*']) + if parallel: + G2.nonlinear_solver = om.NewtonSolver(solve_subsystems=False) + G2.linear_solver = om.DirectSolver(assemble_jac=True) + # this guy wouldn't be included in the iteration loop were it not under Newton + G1.add_subsystem('sub_post_comp', ExecComp4Test('y=.5*x', x=np.ones(size), y=np.zeros(size))) + else: + G1 = parent + G2 = parent + + comps = { + 'pre1': G1.add_subsystem('pre1', ExecComp4Test('y=2.*x', x=np.ones(size), y=np.zeros(size))), + 'pre2': G1.add_subsystem('pre2', ExecComp4Test('y=3.*x - 7.*xx', x=np.ones(size), xx=np.ones(size), y=np.zeros(size))), + + 'iter1': G1.add_subsystem('iter1', ExecComp4Test('y=x1 + x2*4. + x3', + x1=np.ones(size), x2=np.ones(size), + x3=np.ones(size), y=np.zeros(size))), + 'iter2': G1.add_subsystem('iter2', ExecComp4Test('y=.5*x', x=np.ones(size), y=np.zeros(size))), + 'iter4': G2.add_subsystem('iter4', ExecComp4Test('y=7.*x', x=np.ones(size), y=np.zeros(size))), + 'iter3': G2.add_subsystem('iter3', ExecComp4Test('y=6.*x', x=np.ones(size), y=np.zeros(size))), + + 'post1': G2.add_subsystem('post1', ExecComp4Test('y=8.*x', x=np.ones(size), y=np.zeros(size))), + 'post2': G2.add_subsystem('post2', ExecComp4Test('y=x1*9. + x2*5. + x3*3.', x1=np.ones(size), + x2=np.ones(size), x3=np.zeros(size), + y=np.zeros(size))), + } + + for name in force: + if name in comps: + comps[name].options['always_opt'] = True else: - G1 = model - G2 = model + raise RuntimeError(f'"{name}" not in comps') - comps = { - 'pre1': G1.add_subsystem('pre1', ExecComp4Test('y=2.*x', x=np.ones(size), y=np.zeros(size))), - 'pre2': G1.add_subsystem('pre2', ExecComp4Test('y=3.*x - 7.*xx', x=np.ones(size), xx=np.ones(size), y=np.zeros(size))), + if use_ivc: + model.connect('ivc.x', 'iter1.x3') - 'iter1': G1.add_subsystem('iter1', ExecComp4Test('y=x1 + x2*4. + x3', - x1=np.ones(size), x2=np.ones(size), - x3=np.ones(size), y=np.zeros(size))), - 'iter2': G1.add_subsystem('iter2', ExecComp4Test('y=.5*x', x=np.ones(size), y=np.zeros(size))), - 'iter4': G2.add_subsystem('iter4', ExecComp4Test('y=7.*x', x=np.ones(size), y=np.zeros(size))), - 'iter3': G2.add_subsystem('iter3', ExecComp4Test('y=6.*x', x=np.ones(size), y=np.zeros(size))), + model.connect('pre1.y', ['iter1.x1', 'post2.x1', 'pre2.xx']) + model.connect('pre2.y', 'iter1.x2') + model.connect('iter1.y', ['iter2.x', 'iter4.x']) + model.connect('iter2.y', 'post2.x2') + model.connect('iter3.y', 'post1.x') + model.connect('iter4.y', 'iter3.x') + model.connect('post1.y', 'post2.x3') - 'post1': G2.add_subsystem('post1', ExecComp4Test('y=8.*x', x=np.ones(size), y=np.zeros(size))), - 'post2': G2.add_subsystem('post2', ExecComp4Test('y=x1*9. + x2*5. + x3*3.', x1=np.ones(size), - x2=np.ones(size), x3=np.zeros(size), - y=np.zeros(size))), - } + prob.model.add_design_var('iter1.x3', lower=-10, upper=10) + prob.model.add_constraint('iter2.y', upper=10.) + prob.model.add_objective('iter3.y', index=0) - for name in force: - if name in comps: - comps[name].options['always_opt'] = True - else: - raise RuntimeError(f'"{name}" not in comps') + if coloring: + prob.driver.declare_coloring() - if use_ivc: - model.connect('ivc.x', 'iter1.x3') + if recording: + model.recording_options['record_inputs'] = True + model.recording_options['record_outputs'] = True + model.recording_options['record_residuals'] = True - model.connect('pre1.y', ['iter1.x1', 'post2.x1', 'pre2.xx']) - model.connect('pre2.y', 'iter1.x2') - model.connect('iter1.y', ['iter2.x', 'iter4.x']) - model.connect('iter2.y', 'post2.x2') - model.connect('iter3.y', 'post1.x') - model.connect('iter4.y', 'iter3.x') - model.connect('post1.y', 'post2.x3') + recorder = om.SqliteRecorder("sqlite_test_pre_post", record_viewer_data=False) - prob.model.add_design_var('iter1.x3', lower=-10, upper=10) - prob.model.add_constraint('iter2.y', upper=10.) - prob.model.add_objective('iter3.y', index=0) + model.add_recorder(recorder) + prob.driver.add_recorder(recorder) - if coloring: - prob.driver.declare_coloring() + for comp in comps.values(): + comp.add_recorder(recorder) - if recording: - model.recording_options['record_inputs'] = True - model.recording_options['record_outputs'] = True - model.recording_options['record_residuals'] = True + prob.setup(mode=mode, force_alloc_complex=force_complex) - recorder = om.SqliteRecorder("sqlite_test_pre_post", record_viewer_data=False) + # we don't want ExecComps to be colored because it makes the iter counting more complicated. + for comp in model.system_iter(recurse=True, typ=ExecComp4Test): + comp.options['do_coloring'] = False + comp.options['has_diag_partials'] = True - model.add_recorder(recorder) - prob.driver.add_recorder(recorder) - - for comp in comps.values(): - comp.add_recorder(recorder) - - prob.setup(mode=mode, force_alloc_complex=force_complex) - - # we don't want ExecComps to be colored because it makes the iter counting more complicated. - for comp in model.system_iter(recurse=True, typ=ExecComp4Test): - comp.options['do_coloring'] = False - comp.options['has_diag_partials'] = True + return prob - return prob +@use_tempdirs +class TestPrePostIter(unittest.TestCase): def test_pre_post_iter_rev(self): - prob = self.setup_problem(do_pre_post_opt=True, mode='rev') + prob = setup_problem(do_pre_post_opt=True, mode='rev') prob.run_driver() self.assertEqual(prob.model.pre1.num_nl_solves, 1) @@ -136,7 +154,7 @@ def test_pre_post_iter_rev(self): assert_check_totals(data) def test_pre_post_iter_rev_grouped(self): - prob = self.setup_problem(do_pre_post_opt=True, group=True, mode='rev') + prob = setup_problem(do_pre_post_opt=True, group=True, mode='rev') prob.run_driver() self.assertEqual(prob.model.G1.pre1.num_nl_solves, 1) @@ -154,7 +172,7 @@ def test_pre_post_iter_rev_grouped(self): assert_check_totals(data) def test_pre_post_iter_rev_coloring(self): - prob = self.setup_problem(do_pre_post_opt=True, coloring=True, mode='rev') + prob = setup_problem(do_pre_post_opt=True, coloring=True, mode='rev') prob.run_driver() self.assertEqual(prob.model.pre1.num_nl_solves, 1) @@ -172,7 +190,7 @@ def test_pre_post_iter_rev_coloring(self): assert_check_totals(data) def test_pre_post_iter_rev_coloring_grouped(self): - prob = self.setup_problem(do_pre_post_opt=True, coloring=True, group=True, mode='rev') + prob = setup_problem(do_pre_post_opt=True, coloring=True, group=True, mode='rev') prob.run_driver() self.assertEqual(prob.model.G1.pre1.num_nl_solves, 1) @@ -190,7 +208,7 @@ def test_pre_post_iter_rev_coloring_grouped(self): assert_check_totals(data) def test_pre_post_iter_rev_ivc(self): - prob = self.setup_problem(do_pre_post_opt=True, use_ivc=True, mode='rev') + prob = setup_problem(do_pre_post_opt=True, use_ivc=True, mode='rev') prob.run_driver() self.assertEqual(prob.model.pre1.num_nl_solves, 1) @@ -208,7 +226,7 @@ def test_pre_post_iter_rev_ivc(self): assert_check_totals(data) def test_pre_post_iter_rev_ivc_grouped(self): - prob = self.setup_problem(do_pre_post_opt=True, use_ivc=True, group=True, mode='rev') + prob = setup_problem(do_pre_post_opt=True, use_ivc=True, group=True, mode='rev') prob.run_driver() self.assertEqual(prob.model.G1.pre1.num_nl_solves, 1) @@ -226,7 +244,7 @@ def test_pre_post_iter_rev_ivc_grouped(self): assert_check_totals(data) def test_pre_post_iter_rev_ivc_coloring(self): - prob = self.setup_problem(do_pre_post_opt=True, use_ivc=True, coloring=True, mode='rev') + prob = setup_problem(do_pre_post_opt=True, use_ivc=True, coloring=True, mode='rev') prob.run_driver() self.assertEqual(prob.model.pre1.num_nl_solves, 1) @@ -244,7 +262,7 @@ def test_pre_post_iter_rev_ivc_coloring(self): assert_check_totals(data) def test_pre_post_iter_fwd(self): - prob = self.setup_problem(do_pre_post_opt=True, mode='fwd') + prob = setup_problem(do_pre_post_opt=True, mode='fwd') prob.run_driver() self.assertEqual(prob.model.pre1.num_nl_solves, 1) @@ -262,7 +280,7 @@ def test_pre_post_iter_fwd(self): assert_check_totals(data) def test_pre_post_iter_fwd_grouped(self): - prob = self.setup_problem(do_pre_post_opt=True, group=True, mode='fwd') + prob = setup_problem(do_pre_post_opt=True, group=True, mode='fwd') prob.run_driver() self.assertEqual(prob.model.G1.pre1.num_nl_solves, 1) @@ -280,7 +298,7 @@ def test_pre_post_iter_fwd_grouped(self): assert_check_totals(data) def test_pre_post_iter_fwd_coloring(self): - prob = self.setup_problem(do_pre_post_opt=True, coloring=True, mode='fwd') + prob = setup_problem(do_pre_post_opt=True, coloring=True, mode='fwd') prob.run_driver() self.assertEqual(prob.model.pre1.num_nl_solves, 1) @@ -298,7 +316,7 @@ def test_pre_post_iter_fwd_coloring(self): assert_check_totals(data) def test_pre_post_iter_fwd_coloring_grouped(self): - prob = self.setup_problem(do_pre_post_opt=True, coloring=True, group=True, mode='fwd') + prob = setup_problem(do_pre_post_opt=True, coloring=True, group=True, mode='fwd') prob.run_driver() self.assertEqual(prob.model.G1.pre1.num_nl_solves, 1) @@ -316,7 +334,7 @@ def test_pre_post_iter_fwd_coloring_grouped(self): assert_check_totals(data) def test_pre_post_iter_fwd_coloring_grouped_force_post(self): - prob = self.setup_problem(do_pre_post_opt=True, coloring=True, group=True, + prob = setup_problem(do_pre_post_opt=True, coloring=True, group=True, force=['post2'], mode='fwd') prob.run_driver() @@ -338,7 +356,7 @@ def test_pre_post_iter_fwd_coloring_grouped_force_post(self): assert_check_totals(data) def test_pre_post_iter_fwd_coloring_grouped_force_pre(self): - prob = self.setup_problem(do_pre_post_opt=True, coloring=True, group=True, + prob = setup_problem(do_pre_post_opt=True, coloring=True, group=True, force=['pre1'], mode='fwd') prob.run_driver() @@ -360,7 +378,7 @@ def test_pre_post_iter_fwd_coloring_grouped_force_pre(self): assert_check_totals(data) def test_pre_post_iter_fwd_ivc(self): - prob = self.setup_problem(do_pre_post_opt=True, use_ivc=True, mode='fwd') + prob = setup_problem(do_pre_post_opt=True, use_ivc=True, mode='fwd') prob.run_driver() self.assertEqual(prob.model.pre1.num_nl_solves, 1) @@ -378,7 +396,7 @@ def test_pre_post_iter_fwd_ivc(self): assert_check_totals(data) def test_pre_post_iter_fwd_ivc_coloring(self): - prob = self.setup_problem(do_pre_post_opt=True, use_ivc=True, coloring=True, mode='fwd') + prob = setup_problem(do_pre_post_opt=True, use_ivc=True, coloring=True, mode='fwd') prob.run_driver() self.assertEqual(prob.model.pre1.num_nl_solves, 1) @@ -396,7 +414,7 @@ def test_pre_post_iter_fwd_ivc_coloring(self): assert_check_totals(data) def test_pre_post_iter_approx(self): - prob = self.setup_problem(do_pre_post_opt=True, mode='fwd', approx=True, force_complex=True) + prob = setup_problem(do_pre_post_opt=True, mode='fwd', approx=True, force_complex=True) prob.run_driver() @@ -415,7 +433,7 @@ def test_pre_post_iter_approx(self): assert_check_totals(data) def test_pre_post_iter_approx_grouped(self): - prob = self.setup_problem(do_pre_post_opt=True, group=True, approx=True, force_complex=True, mode='fwd') + prob = setup_problem(do_pre_post_opt=True, group=True, approx=True, force_complex=True, mode='fwd') prob.run_driver() self.assertEqual(prob.model.G1.pre1.num_nl_solves, 1) @@ -433,7 +451,7 @@ def test_pre_post_iter_approx_grouped(self): assert_check_totals(data) def test_pre_post_iter_approx_coloring(self): - prob = self.setup_problem(do_pre_post_opt=True, coloring=True, approx=True, force_complex=True, mode='fwd') + prob = setup_problem(do_pre_post_opt=True, coloring=True, approx=True, force_complex=True, mode='fwd') prob.run_driver() self.assertEqual(prob.model.pre1.num_nl_solves, 1) @@ -451,7 +469,7 @@ def test_pre_post_iter_approx_coloring(self): assert_check_totals(data) def test_pre_post_iter_approx_ivc(self): - prob = self.setup_problem(do_pre_post_opt=True, use_ivc=True, approx=True, force_complex=True, mode='fwd') + prob = setup_problem(do_pre_post_opt=True, use_ivc=True, approx=True, force_complex=True, mode='fwd') prob.run_driver() self.assertEqual(prob.model.pre1.num_nl_solves, 1) @@ -469,7 +487,7 @@ def test_pre_post_iter_approx_ivc(self): assert_check_totals(data) def test_pre_post_iter_approx_ivc_coloring(self): - prob = self.setup_problem(do_pre_post_opt=True, use_ivc=True, coloring=True, approx=True, force_complex=True, mode='fwd') + prob = setup_problem(do_pre_post_opt=True, use_ivc=True, coloring=True, approx=True, force_complex=True, mode='fwd') prob.run_driver() self.assertEqual(prob.model.pre1.num_nl_solves, 1) @@ -528,7 +546,7 @@ def test_newton_with_densejac_under_full_model_fd(self): assert_near_equal(J[('obj_cmp.obj', 'pz.z')], np.array([[9.62568658, 1.78576699]]), .00001) def test_reading_system_cases_pre_opt_post(self): - prob = self.setup_problem(do_pre_post_opt=True, mode='fwd', recording=True) + prob = setup_problem(do_pre_post_opt=True, mode='fwd', recording=True) prob.run_driver() prob.cleanup() @@ -646,5 +664,16 @@ def test_comp_multiple_iter_lists(self): self.assertEqual(C4._run_on_opt, [False, True, False]) self.assertEqual(C5._run_on_opt, [False, False, True]) + +@unittest.skipUnless(MPI and PETScVector, "MPI and PETSc are required.") +class PrePostMPITestCase(unittest.TestCase): + N_PROCS = 2 + + def test_newton_on_one_rank(self): + prob = setup_problem(do_pre_post_opt=True, mode='fwd', parallel=True, group=True) + + prob.run_driver() + + if __name__ == "__main__": unittest.main()