-
Notifications
You must be signed in to change notification settings - Fork 14
/
aug_lagrangian.py
638 lines (570 loc) · 24.4 KB
/
aug_lagrangian.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
"""Augmented Lagrangian for scalar valued objectives."""
from scipy.optimize import BFGS, NonlinearConstraint, OptimizeResult
from desc.backend import jnp
from desc.utils import errorif, setdefault
from .bound_utils import (
cl_scaling_vector,
find_active_constraints,
in_bounds,
make_strictly_feasible,
select_step,
)
from .tr_subproblems import (
solve_trust_region_2d_subspace,
solve_trust_region_dogleg,
trust_region_step_exact_cho,
update_tr_radius,
)
from .utils import (
STATUS_MESSAGES,
check_termination,
compute_hess_scale,
inequality_to_bounds,
print_header_nonlinear,
print_iteration_nonlinear,
)
def fmin_auglag( # noqa: C901 - FIXME: simplify this
fun,
x0,
grad,
hess="bfgs",
bounds=(-jnp.inf, jnp.inf),
constraint=None,
args=(),
x_scale=1,
ftol=1e-6,
xtol=1e-6,
gtol=1e-6,
ctol=1e-6,
verbose=1,
maxiter=None,
callback=None,
options={},
):
"""Minimize a function with constraints using an augmented Lagrangian method.
Parameters
----------
fun : callable
objective to be minimized. Should have a signature like fun(x,*args)-> float
x0 : array-like
initial guess
grad : callable
function to compute gradient, df/dx. Should take the same arguments as fun
hess : callable
function to compute Hessian matrix of fun
bounds : tuple of array-like
Lower and upper bounds on independent variables. Defaults to no bounds.
Each array must match the size of x0 or be a scalar, in the latter case a
bound will be the same for all variables. Use np.inf with an appropriate sign
to disable bounds on all or some variables.
constraint : scipy.optimize.NonlinearConstraint
constraint to be satisfied
args : tuple
additional arguments passed to fun, grad, and hess
x_scale : array_like or ``'hess'``, optional
Characteristic scale of each variable. Setting ``x_scale`` is equivalent
to reformulating the problem in scaled variables ``xs = x / x_scale``.
An alternative view is that the size of a trust region along jth
dimension is proportional to ``x_scale[j]``. Improved convergence may
be achieved by setting ``x_scale`` such that a step of a given size
along any of the scaled variables has a similar effect on the cost
function. If set to ``'hess'``, the scale is iteratively updated using the
inverse norms of the columns of the Hessian matrix.
ftol : float or None, optional
Tolerance for termination by the change of the cost function.
The optimization process is stopped when ``dF < ftol * F``,
and there was an adequate agreement between a local quadratic model and
the true model in the last step. If None, the termination by this
condition is disabled.
xtol : float or None, optional
Tolerance for termination by the change of the independent variables.
Optimization is stopped when ``norm(dx) < xtol * (xtol + norm(x))``.
If None, the termination by this condition is disabled.
gtol : float or None, optional
Absolute tolerance for termination by the norm of the gradient.
Optimizer terminates when ``max(abs(g)) < gtol``.
If None, the termination by this condition is disabled.
ctol : float, optional
Tolerance for stopping based on infinity norm of the constraint violation.
Optimizer terminates when ``max(abs(constr_violation)) < ctol`` AND one or more
of the other tolerances are met (``ftol``, ``xtol``, ``gtol``)
verbose : {0, 1, 2}, optional
* 0 : work silently.
* 1 (default) : display a termination report.
* 2 : display progress during iterations
maxiter : int, optional
maximum number of iterations. Defaults to size(x)*100
callback : callable, optional
Called after each iteration. Should be a callable with
the signature:
``callback(xk, *args) -> bool``
where ``xk`` is the current parameter vector, and ``args``
are the same arguments passed to fun and grad. If callback returns True
the algorithm execution is terminated.
options : dict, optional
dictionary of optional keyword arguments to override default solver settings.
- ``"initial_penalty_parameter"`` : (float or array-like) Initial value for the
quadratic penalty parameter. May be array like, in which case it should be the
same length as the number of constraint residuals. Default 10.
- ``"initial_multipliers"`` : (float or array-like or ``"least_squares"``)
Initial Lagrange multipliers. May be array like, in which case it should be
the same length as the number of constraint residuals. If ``"least_squares"``,
uses an estimate based on the least squares solution of the optimality
conditions, see ch 14 of [1]_. Default 0.
- ``"omega"`` : (float) Hyperparameter for determining initial gradient
tolerance. See algorithm 14.4.2 from [1]_ for details. Default 1.0
- ``"eta"`` : (float) Hyperparameter for determining initial constraint
tolerance. See algorithm 14.4.2 from [1]_ for details. Default 1.0
- ``"alpha_omega"`` : (float) Hyperparameter for updating gradient tolerance.
See algorithm 14.4.2 from [1]_ for details. Default 1.0
- ``"beta_omega"`` : (float) Hyperparameter for updating gradient tolerance.
See algorithm 14.4.2 from [1]_ for details. Default 1.0
- ``"alpha_eta"`` : (float) Hyperparameter for updating constraint tolerance.
See algorithm 14.4.2 from [1]_ for details. Default 0.1
- ``"beta_eta"`` : (float) Hyperparameter for updating constraint tolerance.
See algorithm 14.4.2 from [1]_ for details. Default 0.9
- ``"tau"`` : (float) Factor to increase penalty parameter by when constraint
violation doesn't decrease sufficiently. Default 10
- ``"max_nfev"`` : (int > 0) Maximum number of function evaluations (each
iteration may take more than one function evaluation). Default is
``5*maxiter+1``
- ``"max_dx"`` : (float > 0) Maximum allowed change in the norm of x from its
starting point. Default np.inf.
- ``"initial_trust_radius"`` : (``"scipy"``, ``"conngould"``, ``"mix"`` or
float > 0) Initial trust region radius. ``"scipy"`` uses the scaled norm of
x0, which is the default behavior in ``scipy.optimize.least_squares``.
``"conngould"`` uses the norm of the Cauchy point, as recommended in ch17
of [1]_. ``"mix"`` uses the geometric mean of the previous two options. A
float can also be passed to specify the trust radius directly.
Default is ``"scipy"``.
- ``"initial_trust_ratio"`` : (float > 0) A extra scaling factor that is
applied after one of the previous heuristics to determine the initial trust
radius. Default 1.
- ``"max_trust_radius"`` : (float > 0) Maximum allowable trust region radius.
Default ``np.inf``.
- ``"min_trust_radius"`` : (float >= 0) Minimum allowable trust region radius.
Optimization is terminated if the trust region falls below this value.
Default ``np.finfo(x0.dtype).eps``.
- ``"tr_increase_threshold"`` : (0 < float < 1) Increase the trust region
radius when the ratio of actual to predicted reduction exceeds this threshold.
Default 0.75.
- ``"tr_decrease_threshold"`` : (0 < float < 1) Decrease the trust region
radius when the ratio of actual to predicted reduction is less than this
threshold. Default 0.25.
- ``"tr_increase_ratio"`` : (float > 1) Factor to increase the trust region
radius by when the ratio of actual to predicted reduction exceeds threshold.
Default 2.
- ``"tr_decrease_ratio"`` : (0 < float < 1) Factor to decrease the trust region
radius by when the ratio of actual to predicted reduction falls below
threshold. Default 0.25.
- ``"tr_method"`` : (``"exact"``, ``"dogleg"``, ``"subspace"``) Method to use
for trust region subproblem. ``"exact"`` uses a series of cholesky
factorizations (usually 2-3) to find the optimal step. ``"dogleg"``
approximates the optimal step using Powell's dogleg method. ``"subspace"``
solves a reduced subproblem over the space spanned by the gradient and Newton
direction. Default ``"exact"``
- ``"hessian_exception_strategy"`` : (``"skip_update"``, ``"damp_update"``)
If BFGS is used, defines how to proceed when the curvature condition is
violated. Set it to 'skip_update' to just skip the update. Or, alternatively,
set it to 'damp_update' to interpolate between the actual BFGS
result and the unmodified matrix. Both exceptions strategies
are explained in [2]_, p.536-537. Default is ``"damp_update"``.
- ``"hessian_min_curvature"`` : (float) If BFGS is used, this number, scaled by
a normalization factor, defines the minimum curvature
``dot(delta_grad, delta_x)`` allowed to go unaffected by the exception
strategy. By default is equal to 1e-8 when ``exception_strategy =
"skip_update"`` and equal to 0.2 when ``exception_strategy = "damp_update"``.
- ``"hessian_init_scale"`` : (float, ``"auto"``) If BFGS is used, the matrix
scale at first iteration. At the first iteration the Hessian matrix or its
inverse will be initialized with ``init_scale*np.eye(n)``, where ``n`` is the
problem dimension. Set it to ``"auto"`` in order to use an automatic heuristic
for choosing the initial scale. The heuristic is described in [2]_, p.143.
By default uses ``"auto"``.
Returns
-------
res : OptimizeResult
The optimization result represented as a ``OptimizeResult`` object.
Important attributes are: ``x`` the solution array, ``success`` a
Boolean flag indicating if the optimizer exited successfully.
References
----------
.. [1] Conn, Andrew, and Gould, Nicholas, and Toint, Philippe. "Trust-region
methods" (2000).
.. [2] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
Second Edition (2006).
"""
constraint = setdefault(
constraint,
NonlinearConstraint( # create a dummy constraint
fun=lambda x, *args: jnp.array([0.0]),
lb=0.0,
ub=0.0,
jac=lambda x, *args: jnp.zeros((1, x.size)),
),
)
(
z0,
fun_wrapped,
grad_wrapped,
hess_wrapped,
constraint_wrapped,
zbounds,
z2xs,
) = inequality_to_bounds(
x0,
fun,
grad,
hess,
constraint,
bounds,
*args,
)
def lagfun(f, c, y, mu, *args):
return f - jnp.dot(y, c) + jnp.sum(mu / 2 * c * c)
def laggrad(z, y, mu, *args):
c = constraint_wrapped.fun(z, *args)
yJ = constraint_wrapped.vjp(y - mu * c, z, *args)
return grad_wrapped(z, *args) - yJ
if isinstance(hess_wrapped, str) and hess_wrapped.lower() == "bfgs":
bfgs = True
hess_init_scale = options.pop("hessian_init_scale", "auto")
hess_exception_strategy = options.pop(
"hessian_exception_strategy", "damp_update"
)
hess_min_curvature = options.pop("hessian_minimum_curvature", None)
hess_wrapped = BFGS(
hess_exception_strategy, hess_min_curvature, hess_init_scale
)
if isinstance(hess_wrapped, BFGS):
bfgs = True
if hasattr(hess_wrapped, "n"): # assume its already been initialized
assert hess_wrapped.approx_type == "hess"
assert hess.n == z0.size
else:
hess_wrapped.initialize(z0.size, "hess")
laghess = hess_wrapped
elif callable(constraint_wrapped.hess) and callable(hess_wrapped):
bfgs = False
def laghess(z, y, mu, *args):
c = constraint_wrapped.fun(z, *args)
Hf = hess_wrapped(z, *args)
Jc = constraint_wrapped.jac(z, *args)
Hc1 = constraint_wrapped.hess(z, y)
Hc2 = constraint_wrapped.hess(z, c * mu)
return Hf - Hc1 + Hc2 + jnp.dot(mu * Jc.T, Jc)
elif callable(hess_wrapped):
bfgs = False
def laghess(z, y, mu, *args):
H = hess_wrapped(z, *args)
J = constraint_wrapped.jac(z, *args)
# ignoring higher order derivatives of constraints for now
return H + jnp.dot(mu * J.T, J)
errorif(
not (isinstance(laghess, BFGS) or callable(laghess)),
ValueError,
"hess should either be a callable or 'bfgs'",
)
nfev = 0
ngev = 0
nhev = 0
iteration = 0
z = z0.copy()
f = fun_wrapped(z, *args)
c = constraint_wrapped.fun(z, *args)
constr_violation = jnp.linalg.norm(c, ord=jnp.inf)
nfev += 1
lb, ub = zbounds
bounded = jnp.any(lb != -jnp.inf) | jnp.any(ub != jnp.inf)
assert in_bounds(z, lb, ub), "x0 is infeasible"
z = make_strictly_feasible(z, lb, ub)
mu = options.pop("initial_penalty_parameter", 10 * jnp.ones_like(c))
y = options.pop("initial_multipliers", jnp.zeros_like(c))
if y == "least_squares": # use least squares multiplier estimates
_J = constraint_wrapped.jac(z, *args)
_g = grad_wrapped(z, *args)
y = jnp.linalg.lstsq(_J.T, _g)[0]
y = jnp.nan_to_num(y, nan=0.0, posinf=0.0, neginf=0.0)
y, mu, c = jnp.broadcast_arrays(y, mu, c)
# notation following Conn & Gould, algorithm 14.4.2, but with our mu = their mu^-1
omega = options.pop("omega", 1.0)
eta = options.pop("eta", 1.0)
alpha_omega = options.pop("alpha_omega", 1.0)
beta_omega = options.pop("beta_omega", 1.0)
alpha_eta = options.pop("alpha_eta", 0.1)
beta_eta = options.pop("beta_eta", 0.9)
tau = options.pop("tau", 10)
gtolk = max(omega / jnp.mean(mu) ** alpha_omega, gtol)
ctolk = max(eta / jnp.mean(mu) ** alpha_eta, ctol)
L = lagfun(f, c, y, mu)
g = laggrad(z, y, mu, *args)
ngev += 1
if bfgs:
H = laghess.get_matrix()
else:
H = laghess(z, y, mu, *args)
nhev += 1
allx = []
maxiter = setdefault(maxiter, z.size * 100)
max_nfev = options.pop("max_nfev", 5 * maxiter + 1)
max_dx = options.pop("max_dx", jnp.inf)
hess_scale = isinstance(x_scale, str) and x_scale in ["hess", "auto"]
if hess_scale:
scale, scale_inv = compute_hess_scale(H)
else:
x_scale = jnp.broadcast_to(x_scale, z.shape)
scale, scale_inv = x_scale, 1 / x_scale
v, dv = cl_scaling_vector(z, g, lb, ub)
v = jnp.where(dv != 0, v * scale_inv, v)
d = v**0.5 * scale
diag_h = g * dv * scale
g_h = g * d
H_h = d * H * d[:, None]
g_norm = jnp.linalg.norm(g * v, ord=jnp.inf)
# conngould : norm of the cauchy point, as recommended in ch17 of Conn & Gould
# scipy : norm of the scaled x, as used in scipy
# mix : geometric mean of conngould and scipy
init_tr = {
"scipy": jnp.linalg.norm(z * scale_inv / v**0.5),
"conngould": (g_h @ g_h) / abs(g_h @ H_h @ g_h),
"mix": jnp.sqrt(
(g_h @ g_h) / abs(g_h @ H_h @ g_h) * jnp.linalg.norm(z * scale_inv / v**0.5)
),
}
trust_radius = options.pop("initial_trust_radius", "conngould")
tr_ratio = options.pop("initial_trust_ratio", 1.0)
trust_radius = init_tr.get(trust_radius, trust_radius)
trust_radius *= tr_ratio
trust_radius = trust_radius if (trust_radius > 0) else 1.0
max_trust_radius = options.pop("max_trust_radius", jnp.inf)
min_trust_radius = options.pop("min_trust_radius", jnp.finfo(z.dtype).eps)
tr_increase_threshold = options.pop("tr_increase_threshold", 0.75)
tr_decrease_threshold = options.pop("tr_decrease_threshold", 0.5)
tr_increase_ratio = options.pop("tr_increase_ratio", 4)
tr_decrease_ratio = options.pop("tr_decrease_ratio", 0.25)
tr_method = options.pop("tr_method", "exact")
errorif(
len(options) > 0,
ValueError,
"Unknown options: {}".format([key for key in options]),
)
callback = setdefault(callback, lambda *args: False)
methods = {
"dogleg": solve_trust_region_dogleg,
"subspace": solve_trust_region_2d_subspace,
"exact": trust_region_step_exact_cho,
}
errorif(
tr_method not in methods,
ValueError,
f"tr_method should be one of {methods.keys()}, got {tr_method}",
)
subproblem = methods[tr_method]
z_norm = jnp.linalg.norm(z, ord=2)
success = None
message = None
step_norm = jnp.inf
actual_reduction = jnp.inf
Lactual_reduction = jnp.inf
alpha = 0 # "Levenberg-Marquardt" parameter
allx = [z]
alltr = [trust_radius]
if g_norm < gtol and constr_violation < ctol:
success, message = True, STATUS_MESSAGES["gtol"]
if verbose > 1:
print_header_nonlinear(True, "Penalty param", "max(|mltplr|)")
print_iteration_nonlinear(
iteration,
nfev,
f,
actual_reduction,
step_norm,
g_norm,
constr_violation,
jnp.mean(mu),
jnp.max(jnp.abs(y)),
)
while iteration < maxiter and success is None:
H_a = H_h + jnp.diag(diag_h) if bounded else H_h
actual_reduction = -1
Lactual_reduction = -1
# theta controls step back step ratio from the bounds.
theta = max(0.995, 1 - g_norm)
while Lactual_reduction <= 0 and nfev <= max_nfev:
# Solve the sub-problem.
# This gives us the proposed step relative to the current position
# and it tells us whether the proposed step
# has reached the trust region boundary or not.
step_h, hits_boundary, alpha = subproblem(g_h, H_a, trust_radius, alpha)
step = d * step_h # Trust-region solution in the original space.
step, step_h, Lpredicted_reduction = select_step(
z,
H_h,
diag_h,
g_h,
step,
step_h,
d,
trust_radius,
lb,
ub,
theta,
mode="hess",
)
# calculate actual reduction and step norm
step_h_norm = jnp.linalg.norm(step_h, ord=2)
step_norm = jnp.linalg.norm(step, ord=2)
z_new = make_strictly_feasible(z + step, lb, ub, rstep=0)
f_new = fun_wrapped(z_new, *args)
c_new = constraint_wrapped.fun(z_new, *args)
L_new = lagfun(f_new, c_new, y, mu)
nfev += 1
actual_reduction = f - f_new
Lactual_reduction = L - L_new
# update the trust radius according to the actual/predicted ratio
tr_old = trust_radius
trust_radius, Lreduction_ratio = update_tr_radius(
trust_radius,
Lactual_reduction,
Lpredicted_reduction,
step_h_norm,
hits_boundary,
max_trust_radius,
tr_increase_threshold,
tr_increase_ratio,
tr_decrease_threshold,
tr_decrease_ratio,
)
alltr.append(trust_radius)
alpha *= tr_old / trust_radius
success, message = check_termination(
actual_reduction,
f,
step_norm,
z_norm,
g_norm,
Lreduction_ratio,
ftol,
xtol,
gtol,
iteration,
maxiter,
nfev,
max_nfev,
min_trust_radius=min_trust_radius,
dx_total=jnp.linalg.norm(z - z0),
max_dx=max_dx,
constr_violation=constr_violation,
ctol=ctol,
)
if success is not None:
break
# if reduction was enough, accept the step
if Lactual_reduction > 0:
z_old = z
z = z_new
allx.append(z)
f = f_new
c = c_new
constr_violation = jnp.linalg.norm(c, ord=jnp.inf)
L = L_new
g_old = g
g = laggrad(z, y, mu, *args)
ngev += 1
if bfgs:
laghess.update(z - z_old, g - g_old)
H = laghess.get_matrix()
else:
H = laghess(z, y, mu, *args)
nhev += 1
if hess_scale:
scale, scale_inv = compute_hess_scale(H)
v, dv = cl_scaling_vector(z, g, lb, ub)
v = jnp.where(dv != 0, v * scale_inv, v)
g_norm = jnp.linalg.norm(g * v, ord=jnp.inf)
# updating augmented lagrangian params
if g_norm < gtolk: # TODO: maybe also add ftolk, xtolk?
y = jnp.where(jnp.abs(c) < ctolk, y - mu * c, y)
mu = jnp.where(jnp.abs(c) >= ctolk, tau * mu, mu)
if constr_violation < ctolk:
ctolk = max(ctolk / (jnp.mean(mu) ** beta_eta), ctol)
gtolk = max(gtolk / (jnp.mean(mu) ** beta_omega), gtol)
else:
ctolk = max(eta / (jnp.mean(mu) ** alpha_eta), ctol)
gtolk = max(omega / (jnp.mean(mu) ** alpha_omega), gtol)
# if we update lagrangian params, need to recompute L and J
L = lagfun(f, c, y, mu)
g = laggrad(z, y, mu, *args)
ngev += 1
if bfgs:
laghess.update(z - z_old, g - g_old)
H = laghess.get_matrix()
else:
H = laghess(z, y, mu, *args)
nhev += 1
if hess_scale:
scale, scale_inv = compute_hess_scale(H, scale_inv)
v, dv = cl_scaling_vector(z, g, lb, ub)
v = jnp.where(dv != 0, v * scale_inv, v)
g_norm = jnp.linalg.norm(g * v, ord=jnp.inf)
z_norm = jnp.linalg.norm(z, ord=2)
d = v**0.5 * scale
diag_h = g * dv * scale
g_h = g * d
H_h = d * H * d[:, None]
if g_norm < gtol and constr_violation < ctol:
success, message = True, STATUS_MESSAGES["gtol"]
if callback(jnp.copy(z2xs(z)[0]), *args):
success, message = False, STATUS_MESSAGES["callback"]
else:
step_norm = actual_reduction = 0
iteration += 1
if verbose > 1:
print_iteration_nonlinear(
iteration,
nfev,
f,
actual_reduction,
step_norm,
g_norm,
constr_violation,
jnp.mean(mu),
jnp.max(jnp.abs(y)),
)
if g_norm < gtol and constr_violation < ctol:
success, message = True, STATUS_MESSAGES["gtol"]
if (iteration == maxiter) and success is None:
success, message = False, STATUS_MESSAGES["maxiter"]
x, s = z2xs(z)
active_mask = find_active_constraints(z, zbounds[0], zbounds[1], rtol=xtol)
result = OptimizeResult(
x=x,
s=s,
y=y,
penalty_param=mu,
success=success,
fun=f,
grad=g,
v=v,
optimality=g_norm,
nfev=nfev,
ngev=ngev,
nhev=nhev,
nit=iteration,
message=message,
active_mask=active_mask,
constr_violation=constr_violation,
)
if verbose > 0:
if result["success"]:
print(result["message"])
else:
print("Warning: " + result["message"])
print(f""" Current function value: {result["fun"]:.3e}""")
print(f""" Constraint violation: {result['constr_violation']:.3e}""")
print(f""" Total delta_x: {jnp.linalg.norm(x0 - result["x"]):.3e}""")
print(f""" Iterations: {result["nit"]:d}""")
print(f""" Function evaluations: {result["nfev"]:d}""")
print(f""" Gradient evaluations: {result["ngev"]:d}""")
print(f""" Hessian evaluations: {result["nhev"]:d}""")
result["allx"] = [z2xs(x)[0] for x in allx]
return result