From b8ce32d6b3518e9e7a25189b0dabf21accf885f6 Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Mon, 14 Oct 2024 17:47:13 +0200 Subject: [PATCH 01/11] init AL method: model and alg --- examples/demo-cutest.jl | 16 +++ src/AL_alg.jl | 247 +++++++++++++++++++++++++++++++++ src/AL_model.jl | 179 ++++++++++++++++++++++++ src/RegularizedOptimization.jl | 3 + 4 files changed, 445 insertions(+) create mode 100644 examples/demo-cutest.jl create mode 100644 src/AL_alg.jl create mode 100644 src/AL_model.jl diff --git a/examples/demo-cutest.jl b/examples/demo-cutest.jl new file mode 100644 index 00000000..7cbd38d6 --- /dev/null +++ b/examples/demo-cutest.jl @@ -0,0 +1,16 @@ +using NLPModels, CUTEst +using ProximalOperators +using RegularizedOptimization + +problem_name = "HS8" +nlp = CUTEstModel(problem_name) +@assert !has_bounds(nlp) +@assert equality_constrained(nlp) + +h = NormL1(1.0) + +options = ROSolverOptions(ϵa = 1e-6, ϵr = 1e-6, verbose = 2) + +stats = AL(nlp, h, options) + +finalize(nlp) \ No newline at end of file diff --git a/src/AL_alg.jl b/src/AL_alg.jl new file mode 100644 index 00000000..f397c92b --- /dev/null +++ b/src/AL_alg.jl @@ -0,0 +1,247 @@ +export AL + +function AL(nlp::AbstractNLPModel, h::H, options::ROSolverOptions; kwargs...) where {H} + if unconstrained(nlp) || bound_constrained(nlp) + return AL(Val(:unc), nlp, h, options; kwargs...) + elseif equality_constrained(nlp) + return AL(Val(:equ), nlp, h, options; kwargs...) + else # has inequalities + return AL(Val(:ineq), nlp, h, options; kwargs...) + end +end + +function AL( + ::Val{:unc}, + nlp::AbstractNLPModel, + h::H, + options::ROSolverOptions; + subsolver = has_bounds(nlp) ? TRDH : PG, + kwargs..., +) where {H} + if !(unconstrained(nlp) || bound_constrained(nlp)) + error( + "AL(::Val{:unc}, ...) should only be called for unconstrained or bound-constrained problems. Use AL(...)", + ) + end + @warn "Problem does not have general explicit constraints; calling solver $(string(subsolver))" + return subsolver(nlp, h, options; kwargs...) +end + +function AL( + ::Val{:ineq}, + nlp::AbstractNLPModel, + h::H, + options::ROSolverOptions{R}; + x0::AbstractVector{R} = nlp.meta.x0, + kwargs..., +) where {H, R} + if nlp.meta.ncon == 0 || equality_constrained(nlp) + error("AL(::Val{:ineq}, ...) should only be called for problems with inequalities. Use AL(...)") + end + snlp = nlp isa AbstractNLSModel ? SlackNLSModel(nlp) : SlackModel(nlp) + if length(x) != snlp.meta.nvar + x0s = fill!(V(undef, snlp.meta.nvar), zero(eltype(V))) + x0s[1:(nlp.meta.nvar)] .= x0 + else + x0s = x0 + end + output = AL(Val(:equ), snlp, h, options; x0 = x0s, kwargs...) + output.solution = output.solution[1:(nlp.meta.nvar)] + return output +end + +""" + AL(nlp, h, options; kwargs...) + +An augmented Lagrangian method for the problem + + min f(x) + h(x) subject to lvar ≤ x ≤ uvar, lcon ≤ c(x) ≤ ucon + +where f: ℝⁿ → ℝ, c: ℝⁿ → ℝᵐ and their derivatives are Lipschitz continuous and h: ℝⁿ → ℝ is +lower semi-continuous, proper and prox-bounded. + +At each iteration, a step s is computed as an approximate solution of + + min ½ ‖J(x) s + F(x)‖² + ½ σ ‖s‖² + ψ(s; x) + +where F(x) and J(x) are the residual and its Jacobian at x, respectively, ψ(s; x) = h(x + s), +and σ > 0 is a regularization parameter. + +### Arguments + +* `nlp::AbstractNLPModel`: a smooth optimization problem +* `h`: a regularizer such as those defined in ProximalOperators +* `options::ROSolverOptions`: a structure containing algorithmic parameters + +The objective and gradient of `nlp` will be accessed. +The Hessian of `nlp` may be accessed or not, depending on the subsolver adopted. +If adopted, the Hessian is accessed as an abstract operator and need not be the exact Hessian. + +### Keyword arguments + +* `x0::AbstractVector`: a primal initial guess (default: `nlp.meta.x0`) +* `y0::AbstractVector`: a dual initial guess (default: `nlp.meta.y0`) +* `subsolver`: the procedure used to compute a step (`PG`, `R2` or `TRDH`) +* `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver +* `subsolver_options::ROSolverOptions`: default options to pass to the subsolver. + +### Return values + +* `stats::GenericExecutionStats`: solution and other info. + +""" +function AL( + ::Val{:equ}, + nlp::AbstractNLPModel, + h::H, + options::ROSolverOptions{T}; + x0::AbstractVector{T} = nlp.meta.x0, + y0::AbstractVector{T} = nlp.meta.y0, + subsolver = has_bounds(nlp) ? TRDH : R2, + subsolver_logger::Logging.AbstractLogger = Logging.NullLogger(), + subsolver_options::ROSolverOptions{T} = ROSolverOptions{T}(), + init_penalty::Real = T(10), + factor_penalty_up::Real = T(2), + ctol::Real = options.ϵa > 0 ? options.ϵa : options.ϵr, + init_subtol::Real = T(0.1), + factor_primal_linear_improvement::Real = T(3 // 4), + factor_decrease_subtol::Real = T(1 // 4), +) where {H, T <: Real} + if !(nlp.meta.minimize) + error("AL only works for minimization problems") + end + if nlp.meta.ncon == 0 || !equality_constrained(nlp) + error( + "AL(::Val{:equ}, ...) should only be called for equality-constrained problems. Use AL(...)", + ) + end + + stats = GenericExecutionStats(nlp) + set_iter!(stats, 0) + start_time = time() + set_time!(stats, 0.0) + + # parameters + @assert init_penalty > 0 + @assert factor_penalty_up > 1 + @assert 0 < factor_primal_linear_improvement < 1 + @assert 0 < factor_decrease_subtol < 1 + ymin = -1e20 + ymax = 1e20 + @assert ymin <= 0 + @assert ymax >= 0 + verbose = options.verbose + max_time = options.maxTime + max_iter = options.maxIter + + # initialization + @assert length(x0) == nlp.meta.nvar + @assert length(y0) == nlp.meta.ncon + x = similar(nlp.meta.x0) + y = similar(nlp.meta.y0) + x .= x0 + y .= y0 + set_solution!(stats, x) + set_constraint_multipliers!(stats, y) + + fx, cx = objcons(nlp, x) + mu = init_penalty + alf = AugLagModel(nlp, y, mu, x, fx, cx) + + V = norm(alf.cx, Inf) + V_old = Inf + iter = 0 + subiters = 0 + done = false + + suboptions = subsolver_options + subtol = init_subtol + + if verbose > 0 + @info log_header( + [:iter, :subiter, :fx, :prim_res, :μ, :normy, :dual_tol, :inner_status], + [Int, Int, Float64, Float64, Float64, Float64, Float64, Symbol], + ) + @info log_row(Any[iter, subiters, fx, V, alf.μ, norm(y), subtol]) + end + + while !done + iter += 1 + + # dual safeguard + project_y!(alf, ymin, ymax) + + # AL subproblem + suboptions.ϵa = max(subtol, options.ϵa) + suboptions.ϵr = max(subtol, options.ϵr) + subout = with_logger(subsolver_logger) do + subsolver(alf, h, suboptions, x0 = x) + end + x .= subout.solution + subiters += subout.iter + + # objective + fx = obj(nlp, x) + set_objective!(stats, fx) + + # dual estimate + update_y!(alf) + set_constraint_multipliers!(stats, alf.y) + + # stationarity measure + if subout.dual_residual_reliable + set_dual_residual!(stats, subout.dual_feas) + end + + # primal violation + V = norm(alf.cx, Inf) + set_primal_residual!(stats, V) + + # termination checks + dual_ok = + subout.status_reliable && + subout.status == :first_order && + suboptions.ϵa <= options.ϵa && + suboptions.ϵr <= options.ϵr + primal_ok = V <= ctol + optimal = dual_ok && primal_ok + + set_iter!(stats, iter) + set_time!(stats, time() - start_time) + set_status!( + stats, + SolverCore.get_status( + nlp, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = optimal, + infeasible = false, + parameter_too_large = false, + unbounded = false, + stalled = false, + exception = false, + max_time = max_time, + max_iter = max_iter, + ), + ) + + done = stats.status != :unknown + + if verbose > 0 && (mod(stats.iter, verbose) == 0 || done) + @info log_row(Any[iter, subiters, fx, V, alf.μ, norm(alf.y), subtol, subout.status]) + end + + if !done + if V > max(ctol, factor_primal_linear_improvement * V_old) + #@info "decreasing mu" + mu *= factor_penalty_up + end + update_μ!(alf, mu) + V_old = V + subtol *= factor_decrease_subtol + end + end + set_solution!(stats, x) + set_constraint_multipliers!(stats, alf.y) + return stats +end \ No newline at end of file diff --git a/src/AL_model.jl b/src/AL_model.jl new file mode 100644 index 00000000..f0a061e6 --- /dev/null +++ b/src/AL_model.jl @@ -0,0 +1,179 @@ +# based on Percival.jl +# https://github.com/JuliaSmoothOptimizers/Percival.jl/blob/main/src/AugLagModel.jl + +export AugLagModel +export update_cx!, update_y!, project_y!, update_μ! + +using NLPModels, NLPModelsModifiers, LinearAlgebra, LinearOperators +using NLPModels: increment!, @lencheck + +@doc raw""" + AugLagModel(model, y, μ, x, fx, cx) + +Given a model +```math +\min \ f(x) \quad s.t. \quad c(x) = 0, \quad l ≤ x ≤ u, +``` +this new model represents the subproblem of the augmented Lagrangian method +```math +\min \ f(x) - yᵀc(x) + \tfrac{1}{2} μ \|c(x)\|^2 \quad s.t. \quad l ≤ x ≤ u, +``` +where y is an estimates of the Lagrange multiplier vector and μ is the penalty parameter. + +In addition to keeping `meta` and `counters` as any NLPModel, an AugLagModel also stores +- `model`: The internal model defining ``f``, ``c`` and the bounds, +- `y`: The multipliers estimate, +- `μ`: The penalty parameter, +- `x`: Reference to the last point at which the function `c(x)` was computed, +- `fx`: Reference to `f(x)`, +- `cx`: Reference to `c(x)`, +- `μc_y`: storage for y - μ * cx, +- `Jtv`: storage for jtprod(nlp, x, v). + +Use the functions `update_cx!`, `update_y!` and `update_μ!` to update these values. +""" +mutable struct AugLagModel{M <: AbstractNLPModel, T <: AbstractFloat, V <: AbstractVector} <: + AbstractNLPModel{T, V} + meta::NLPModelMeta{T, V} + counters::Counters + model::M + y::V + μ::T + x::V + fx::T + cx::V + μc_y::V # y - μ * cx + Jtv::Vector{T} +end + +function AugLagModel(model::AbstractNLPModel{T, V}, y::V, μ::T, x::V, fx::T, cx::V) where {T, V} + nvar, ncon = model.meta.nvar, model.meta.ncon + @assert length(x) == nvar + @assert length(y) == ncon + @assert length(cx) == ncon + μ >= 0 || error("Penalty parameter μ should be nonnegative") + + meta = NLPModelMeta( + nvar, + x0 = model.meta.x0, + lvar = model.meta.lvar, + uvar = model.meta.uvar, + name = "AugLagModel-$(model.meta.name)", + ) + + return AugLagModel( + meta, + Counters(), + model, + y, + μ, + x, + fx, + cx, + isassigned(y) && isassigned(cx) ? y - μ * cx : similar(y), + zeros(T, nvar), + ) +end + +""" + update_cx!(nlp, x) + +Given an `AugLagModel`, if `x != nlp.x`, then updates the internal value `nlp.cx` calling `cons` +on `nlp.model`, and reset `nlp.fx` to a NaN. Also updates `nlp.μc_y`. +""" +function update_cx!(nlp::AugLagModel, x::AbstractVector{T}) where {T} + @assert length(x) == nlp.meta.nvar + if x != nlp.x + cons!(nlp.model, x, nlp.cx) + nlp.cx .-= nlp.model.meta.lcon + nlp.x .= x + refresh_μc_y!(nlp) + nlp.fx = T(NaN) + end +end + +""" + update_fxcx!(nlp, x) + +Given an `AugLagModel`, if `x != nlp.x`, then updates the internal value `nlp.cx` calling `objcons` +on `nlp.model`. Also updates `nlp.μc_y`. Returns fx only. +""" +function update_fxcx!(nlp::AugLagModel, x::AbstractVector) + @assert length(x) == nlp.meta.nvar + if x != nlp.x + nlp.fx, _ = objcons!(nlp.model, x, nlp.cx) + nlp.cx .-= nlp.model.meta.lcon + nlp.x .= x + refresh_μc_y!(nlp) + elseif isnan(nlp.fx) + nlp.fx = obj(nlp.model, x) + end +end + +function refresh_μc_y!(nlp::AugLagModel) + nlp.μc_y .= nlp.μ .* nlp.cx .- nlp.y +end + +""" + update_y!(nlp) + +Given an `AugLagModel`, update `nlp.y = -nlp.μc_y` and updates `nlp.μc_y` accordingly. +""" +function update_y!(nlp::AugLagModel) + nlp.y .= .-nlp.μc_y + refresh_μc_y!(nlp) +end + +""" + project_y!(nlp, ymin, ymax) + +Given an `AugLagModel`, project `nlp.y` into [ymin, ymax]and updates `nlp.μc_y` accordingly. +""" +function project_y!(nlp::AugLagModel, ymin::AbstractVector{T}, ymax::AbstractVector{T}) where {T <: Real} + nlp.y .= max.(ymin, min.(nlp.y, ymax)) + refresh_μc_y!(nlp) +end + +function project_y!(nlp::AugLagModel, ymin::T, ymax::T) where {T <: Real} + nlp.y .= max.(ymin, min.(nlp.y, ymax)) + refresh_μc_y!(nlp) +end + +""" + update_μ!(nlp, μ) + +Given an `AugLagModel`, updates `nlp.μ = μ` and `nlp.μc_y` accordingly. +""" +function update_μ!(nlp::AugLagModel, μ::AbstractFloat) + nlp.μ = μ + refresh_μc_y!(nlp) +end + +function NLPModels.obj(nlp::AugLagModel, x::AbstractVector) + @assert length(x) == nlp.meta.nvar + increment!(nlp, :neval_obj) + update_fxcx!(nlp, x) + return nlp.fx - dot(nlp.y, nlp.cx) + (nlp.μ / 2) * dot(nlp.cx, nlp.cx) +end + +function NLPModels.grad!(nlp::AugLagModel, x::AbstractVector, g::AbstractVector) + @assert length(x) == nlp.meta.nvar + @assert length(g) == nlp.meta.nvar + increment!(nlp, :neval_grad) + update_cx!(nlp, x) + grad!(nlp.model, x, g) + g .+= jtprod!(nlp.model, x, nlp.μc_y, nlp.Jtv) + return g +end + +function NLPModels.objgrad!(nlp::AugLagModel, x::AbstractVector, g::AbstractVector) + @assert length(x) == nlp.meta.nvar + @assert length(g) == nlp.meta.nvar + increment!(nlp, :neval_obj) + increment!(nlp, :neval_grad) + update_fxcx!(nlp, x) + f = nlp.fx - dot(nlp.y, nlp.cx) + (nlp.μ / 2) * dot(nlp.cx, nlp.cx) + grad!(nlp.model, x, g) + g .+= jtprod!(nlp.model, x, nlp.μc_y, nlp.Jtv) + return f, g +end \ No newline at end of file diff --git a/src/RegularizedOptimization.jl b/src/RegularizedOptimization.jl index 22c1c814..c78c7780 100644 --- a/src/RegularizedOptimization.jl +++ b/src/RegularizedOptimization.jl @@ -21,4 +21,7 @@ include("R2_alg.jl") include("LM_alg.jl") include("LMTR_alg.jl") +include("AL_model.jl") +include("AL_alg.jl") + end # module RegularizedOptimization From 12b452ad4586ad0a3c121c4b4231d0e70caf8317 Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Tue, 15 Oct 2024 15:14:41 +0200 Subject: [PATCH 02/11] TR and R2 as default subsolvers --- src/AL_alg.jl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/AL_alg.jl b/src/AL_alg.jl index f397c92b..ee5823ae 100644 --- a/src/AL_alg.jl +++ b/src/AL_alg.jl @@ -15,7 +15,7 @@ function AL( nlp::AbstractNLPModel, h::H, options::ROSolverOptions; - subsolver = has_bounds(nlp) ? TRDH : PG, + subsolver = has_bounds(nlp) ? TR : R2, kwargs..., ) where {H} if !(unconstrained(nlp) || bound_constrained(nlp)) @@ -27,6 +27,9 @@ function AL( return subsolver(nlp, h, options; kwargs...) end +# a uniform solver interface is missing +# TR(nlp, h, options; kwargs...) = TR(nlp, h, NormLinf(1.0), options; kwargs...) + function AL( ::Val{:ineq}, nlp::AbstractNLPModel, @@ -81,7 +84,7 @@ If adopted, the Hessian is accessed as an abstract operator and need not be the * `x0::AbstractVector`: a primal initial guess (default: `nlp.meta.x0`) * `y0::AbstractVector`: a dual initial guess (default: `nlp.meta.y0`) -* `subsolver`: the procedure used to compute a step (`PG`, `R2` or `TRDH`) +* `subsolver`: the procedure used to compute a step (e.g. `PG`, `R2`, `TR` or `TRDH`) * `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver * `subsolver_options::ROSolverOptions`: default options to pass to the subsolver. @@ -97,7 +100,7 @@ function AL( options::ROSolverOptions{T}; x0::AbstractVector{T} = nlp.meta.x0, y0::AbstractVector{T} = nlp.meta.y0, - subsolver = has_bounds(nlp) ? TRDH : R2, + subsolver = has_bounds(nlp) ? TR : R2, subsolver_logger::Logging.AbstractLogger = Logging.NullLogger(), subsolver_options::ROSolverOptions{T} = ROSolverOptions{T}(), init_penalty::Real = T(10), From 81975cc25edfc1e4242efc9028b501ea50feabf1 Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Tue, 15 Oct 2024 15:15:09 +0200 Subject: [PATCH 03/11] bugfix for ineq dispatch --- src/AL_alg.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/AL_alg.jl b/src/AL_alg.jl index ee5823ae..8983068b 100644 --- a/src/AL_alg.jl +++ b/src/AL_alg.jl @@ -34,16 +34,16 @@ function AL( ::Val{:ineq}, nlp::AbstractNLPModel, h::H, - options::ROSolverOptions{R}; - x0::AbstractVector{R} = nlp.meta.x0, + options::ROSolverOptions{T}; + x0::AbstractVector{T} = nlp.meta.x0, kwargs..., -) where {H, R} +) where {H, T} if nlp.meta.ncon == 0 || equality_constrained(nlp) error("AL(::Val{:ineq}, ...) should only be called for problems with inequalities. Use AL(...)") end snlp = nlp isa AbstractNLSModel ? SlackNLSModel(nlp) : SlackModel(nlp) - if length(x) != snlp.meta.nvar - x0s = fill!(V(undef, snlp.meta.nvar), zero(eltype(V))) + if length(x0) != snlp.meta.nvar + x0s = zeros(T, snlp.meta.nvar) x0s[1:(nlp.meta.nvar)] .= x0 else x0s = x0 From 829ceabc2e134b41bc805754689c847a3012b556 Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Wed, 13 Nov 2024 18:48:55 +0100 Subject: [PATCH 04/11] improve AL description comment --- src/AL_alg.jl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/AL_alg.jl b/src/AL_alg.jl index 8983068b..a6685304 100644 --- a/src/AL_alg.jl +++ b/src/AL_alg.jl @@ -63,12 +63,14 @@ An augmented Lagrangian method for the problem where f: ℝⁿ → ℝ, c: ℝⁿ → ℝᵐ and their derivatives are Lipschitz continuous and h: ℝⁿ → ℝ is lower semi-continuous, proper and prox-bounded. -At each iteration, a step s is computed as an approximate solution of +At each iteration, an iterate x is computed as an approximate solution of - min ½ ‖J(x) s + F(x)‖² + ½ σ ‖s‖² + ψ(s; x) + min L(x;y,μ) + h(x) subject to lvar ≤ x ≤ uvar -where F(x) and J(x) are the residual and its Jacobian at x, respectively, ψ(s; x) = h(x + s), -and σ > 0 is a regularization parameter. +where y is an estimate of the Lagrange multiplier vector for the constraints lcon ≤ c(x) ≤ ucon, +μ is the penalty parameter and L(⋅;y,μ) is the augmented Lagrangian function defined by + + L(x;y,μ) := f(x) - yᵀc(x) + ½ μ ‖c(x)‖². ### Arguments From 3d504cf39850c17a336212ecf9918d7fe3c3fcb3 Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Sat, 16 Nov 2024 09:53:50 +0100 Subject: [PATCH 05/11] use Percival for AL model --- Project.toml | 2 + src/AL_alg.jl | 19 ++++ src/AL_model.jl | 179 --------------------------------- src/RegularizedOptimization.jl | 2 +- 4 files changed, 22 insertions(+), 180 deletions(-) delete mode 100644 src/AL_model.jl diff --git a/Project.toml b/Project.toml index d5ba1185..e509ce99 100644 --- a/Project.toml +++ b/Project.toml @@ -9,6 +9,7 @@ LinearOperators = "5c8ed15e-5a4c-59e4-a42b-c7e8811fb125" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" NLPModelsModifiers = "e01155f1-5c6f-4375-a9d8-616dd036575f" +Percival = "01435c0c-c90d-11e9-3788-63660f8fbccc" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProximalOperators = "a725b495-10eb-56fe-b38b-717eba820537" RegularizedProblems = "ea076b23-609f-44d2-bb12-a4ae45328278" @@ -20,6 +21,7 @@ TSVD = "9449cd9e-2762-5aa3-a617-5413e99d722e" LinearOperators = "2.7" NLPModels = "0.19, 0.20" NLPModelsModifiers = "0.7" +Percival = "0.7.2" ProximalOperators = "0.15" RegularizedProblems = "0.1.1" ShiftedProximalOperators = "0.2" diff --git a/src/AL_alg.jl b/src/AL_alg.jl index a6685304..ad2a864c 100644 --- a/src/AL_alg.jl +++ b/src/AL_alg.jl @@ -249,4 +249,23 @@ function AL( set_solution!(stats, x) set_constraint_multipliers!(stats, alf.y) return stats +end + +""" + project_y!(nlp, ymin, ymax) + +Given an `AugLagModel`, project `nlp.y` into [ymin, ymax]and updates `nlp.μc_y` accordingly. +""" +function project_y!( + nlp::AugLagModel, + ymin::AbstractVector{T}, + ymax::AbstractVector{T}, +) where {T <: Real} + nlp.y .= max.(ymin, min.(nlp.y, ymax)) + nlp.μc_y .= nlp.μ .* nlp.cx .- nlp.y +end + +function project_y!(nlp::AugLagModel, ymin::T, ymax::T) where {T <: Real} + nlp.y .= max.(ymin, min.(nlp.y, ymax)) + nlp.μc_y .= nlp.μ .* nlp.cx .- nlp.y end \ No newline at end of file diff --git a/src/AL_model.jl b/src/AL_model.jl deleted file mode 100644 index f0a061e6..00000000 --- a/src/AL_model.jl +++ /dev/null @@ -1,179 +0,0 @@ -# based on Percival.jl -# https://github.com/JuliaSmoothOptimizers/Percival.jl/blob/main/src/AugLagModel.jl - -export AugLagModel -export update_cx!, update_y!, project_y!, update_μ! - -using NLPModels, NLPModelsModifiers, LinearAlgebra, LinearOperators -using NLPModels: increment!, @lencheck - -@doc raw""" - AugLagModel(model, y, μ, x, fx, cx) - -Given a model -```math -\min \ f(x) \quad s.t. \quad c(x) = 0, \quad l ≤ x ≤ u, -``` -this new model represents the subproblem of the augmented Lagrangian method -```math -\min \ f(x) - yᵀc(x) + \tfrac{1}{2} μ \|c(x)\|^2 \quad s.t. \quad l ≤ x ≤ u, -``` -where y is an estimates of the Lagrange multiplier vector and μ is the penalty parameter. - -In addition to keeping `meta` and `counters` as any NLPModel, an AugLagModel also stores -- `model`: The internal model defining ``f``, ``c`` and the bounds, -- `y`: The multipliers estimate, -- `μ`: The penalty parameter, -- `x`: Reference to the last point at which the function `c(x)` was computed, -- `fx`: Reference to `f(x)`, -- `cx`: Reference to `c(x)`, -- `μc_y`: storage for y - μ * cx, -- `Jtv`: storage for jtprod(nlp, x, v). - -Use the functions `update_cx!`, `update_y!` and `update_μ!` to update these values. -""" -mutable struct AugLagModel{M <: AbstractNLPModel, T <: AbstractFloat, V <: AbstractVector} <: - AbstractNLPModel{T, V} - meta::NLPModelMeta{T, V} - counters::Counters - model::M - y::V - μ::T - x::V - fx::T - cx::V - μc_y::V # y - μ * cx - Jtv::Vector{T} -end - -function AugLagModel(model::AbstractNLPModel{T, V}, y::V, μ::T, x::V, fx::T, cx::V) where {T, V} - nvar, ncon = model.meta.nvar, model.meta.ncon - @assert length(x) == nvar - @assert length(y) == ncon - @assert length(cx) == ncon - μ >= 0 || error("Penalty parameter μ should be nonnegative") - - meta = NLPModelMeta( - nvar, - x0 = model.meta.x0, - lvar = model.meta.lvar, - uvar = model.meta.uvar, - name = "AugLagModel-$(model.meta.name)", - ) - - return AugLagModel( - meta, - Counters(), - model, - y, - μ, - x, - fx, - cx, - isassigned(y) && isassigned(cx) ? y - μ * cx : similar(y), - zeros(T, nvar), - ) -end - -""" - update_cx!(nlp, x) - -Given an `AugLagModel`, if `x != nlp.x`, then updates the internal value `nlp.cx` calling `cons` -on `nlp.model`, and reset `nlp.fx` to a NaN. Also updates `nlp.μc_y`. -""" -function update_cx!(nlp::AugLagModel, x::AbstractVector{T}) where {T} - @assert length(x) == nlp.meta.nvar - if x != nlp.x - cons!(nlp.model, x, nlp.cx) - nlp.cx .-= nlp.model.meta.lcon - nlp.x .= x - refresh_μc_y!(nlp) - nlp.fx = T(NaN) - end -end - -""" - update_fxcx!(nlp, x) - -Given an `AugLagModel`, if `x != nlp.x`, then updates the internal value `nlp.cx` calling `objcons` -on `nlp.model`. Also updates `nlp.μc_y`. Returns fx only. -""" -function update_fxcx!(nlp::AugLagModel, x::AbstractVector) - @assert length(x) == nlp.meta.nvar - if x != nlp.x - nlp.fx, _ = objcons!(nlp.model, x, nlp.cx) - nlp.cx .-= nlp.model.meta.lcon - nlp.x .= x - refresh_μc_y!(nlp) - elseif isnan(nlp.fx) - nlp.fx = obj(nlp.model, x) - end -end - -function refresh_μc_y!(nlp::AugLagModel) - nlp.μc_y .= nlp.μ .* nlp.cx .- nlp.y -end - -""" - update_y!(nlp) - -Given an `AugLagModel`, update `nlp.y = -nlp.μc_y` and updates `nlp.μc_y` accordingly. -""" -function update_y!(nlp::AugLagModel) - nlp.y .= .-nlp.μc_y - refresh_μc_y!(nlp) -end - -""" - project_y!(nlp, ymin, ymax) - -Given an `AugLagModel`, project `nlp.y` into [ymin, ymax]and updates `nlp.μc_y` accordingly. -""" -function project_y!(nlp::AugLagModel, ymin::AbstractVector{T}, ymax::AbstractVector{T}) where {T <: Real} - nlp.y .= max.(ymin, min.(nlp.y, ymax)) - refresh_μc_y!(nlp) -end - -function project_y!(nlp::AugLagModel, ymin::T, ymax::T) where {T <: Real} - nlp.y .= max.(ymin, min.(nlp.y, ymax)) - refresh_μc_y!(nlp) -end - -""" - update_μ!(nlp, μ) - -Given an `AugLagModel`, updates `nlp.μ = μ` and `nlp.μc_y` accordingly. -""" -function update_μ!(nlp::AugLagModel, μ::AbstractFloat) - nlp.μ = μ - refresh_μc_y!(nlp) -end - -function NLPModels.obj(nlp::AugLagModel, x::AbstractVector) - @assert length(x) == nlp.meta.nvar - increment!(nlp, :neval_obj) - update_fxcx!(nlp, x) - return nlp.fx - dot(nlp.y, nlp.cx) + (nlp.μ / 2) * dot(nlp.cx, nlp.cx) -end - -function NLPModels.grad!(nlp::AugLagModel, x::AbstractVector, g::AbstractVector) - @assert length(x) == nlp.meta.nvar - @assert length(g) == nlp.meta.nvar - increment!(nlp, :neval_grad) - update_cx!(nlp, x) - grad!(nlp.model, x, g) - g .+= jtprod!(nlp.model, x, nlp.μc_y, nlp.Jtv) - return g -end - -function NLPModels.objgrad!(nlp::AugLagModel, x::AbstractVector, g::AbstractVector) - @assert length(x) == nlp.meta.nvar - @assert length(g) == nlp.meta.nvar - increment!(nlp, :neval_obj) - increment!(nlp, :neval_grad) - update_fxcx!(nlp, x) - f = nlp.fx - dot(nlp.y, nlp.cx) + (nlp.μ / 2) * dot(nlp.cx, nlp.cx) - grad!(nlp.model, x, g) - g .+= jtprod!(nlp.model, x, nlp.μc_y, nlp.Jtv) - return f, g -end \ No newline at end of file diff --git a/src/RegularizedOptimization.jl b/src/RegularizedOptimization.jl index c78c7780..908eb928 100644 --- a/src/RegularizedOptimization.jl +++ b/src/RegularizedOptimization.jl @@ -9,6 +9,7 @@ using ProximalOperators, TSVD # dependencies from us using LinearOperators, NLPModels, NLPModelsModifiers, RegularizedProblems, ShiftedProximalOperators, SolverCore +using Percival: AugLagModel, update_y!, update_μ! include("utils.jl") include("input_struct.jl") @@ -21,7 +22,6 @@ include("R2_alg.jl") include("LM_alg.jl") include("LMTR_alg.jl") -include("AL_model.jl") include("AL_alg.jl") end # module RegularizedOptimization From 4a25ee8878053fd009c05adf243efdc5e69d7fb7 Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Sat, 16 Nov 2024 17:51:05 +0100 Subject: [PATCH 06/11] add optional dual_safeguard with default projection --- src/AL_alg.jl | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/src/AL_alg.jl b/src/AL_alg.jl index ad2a864c..ad6be028 100644 --- a/src/AL_alg.jl +++ b/src/AL_alg.jl @@ -111,7 +111,7 @@ function AL( init_subtol::Real = T(0.1), factor_primal_linear_improvement::Real = T(3 // 4), factor_decrease_subtol::Real = T(1 // 4), -) where {H, T <: Real} + dual_safeguard = project_y!, if !(nlp.meta.minimize) error("AL only works for minimization problems") end @@ -131,10 +131,6 @@ function AL( @assert factor_penalty_up > 1 @assert 0 < factor_primal_linear_improvement < 1 @assert 0 < factor_decrease_subtol < 1 - ymin = -1e20 - ymax = 1e20 - @assert ymin <= 0 - @assert ymax >= 0 verbose = options.verbose max_time = options.maxTime max_iter = options.maxIter @@ -174,7 +170,7 @@ function AL( iter += 1 # dual safeguard - project_y!(alf, ymin, ymax) + dual_safeguard(alf) # AL subproblem suboptions.ϵa = max(subtol, options.ϵa) @@ -252,20 +248,13 @@ function AL( end """ - project_y!(nlp, ymin, ymax) + project_y!(nlp) -Given an `AugLagModel`, project `nlp.y` into [ymin, ymax]and updates `nlp.μc_y` accordingly. +Given an `AugLagModel`, project `nlp.y` into [ymin, ymax] and updates `nlp.μc_y` accordingly. """ -function project_y!( - nlp::AugLagModel, - ymin::AbstractVector{T}, - ymax::AbstractVector{T}, -) where {T <: Real} - nlp.y .= max.(ymin, min.(nlp.y, ymax)) - nlp.μc_y .= nlp.μ .* nlp.cx .- nlp.y -end +project_y!(nlp::AugLagModel) = project_y!(nlp::AugLagModel, -1e20, 1e20) -function project_y!(nlp::AugLagModel, ymin::T, ymax::T) where {T <: Real} +function project_y!(nlp::AugLagModel, ymin::V, ymax::V) where {V} nlp.y .= max.(ymin, min.(nlp.y, ymax)) nlp.μc_y .= nlp.μ .* nlp.cx .- nlp.y end \ No newline at end of file From 456c9bc0a77f895137000a86a4658d0091b1a23e Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Sat, 16 Nov 2024 17:54:49 +0100 Subject: [PATCH 07/11] add R2 method, add max_eval, rename cviol, remove rtol --- examples/demo-cutest.jl | 4 ++-- src/AL_alg.jl | 40 ++++++++++++++++++++-------------------- src/R2_alg.jl | 10 ++++++++++ 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/examples/demo-cutest.jl b/examples/demo-cutest.jl index 7cbd38d6..358201b4 100644 --- a/examples/demo-cutest.jl +++ b/examples/demo-cutest.jl @@ -9,8 +9,8 @@ nlp = CUTEstModel(problem_name) h = NormL1(1.0) -options = ROSolverOptions(ϵa = 1e-6, ϵr = 1e-6, verbose = 2) - +options = ROSolverOptions(ϵa = 1e-6, verbose = 1) stats = AL(nlp, h, options) +print(stats) finalize(nlp) \ No newline at end of file diff --git a/src/AL_alg.jl b/src/AL_alg.jl index ad6be028..701f1a62 100644 --- a/src/AL_alg.jl +++ b/src/AL_alg.jl @@ -100,18 +100,18 @@ function AL( nlp::AbstractNLPModel, h::H, options::ROSolverOptions{T}; - x0::AbstractVector{T} = nlp.meta.x0, - y0::AbstractVector{T} = nlp.meta.y0, + x0::V = nlp.meta.x0, + y0::V = nlp.meta.y0, subsolver = has_bounds(nlp) ? TR : R2, subsolver_logger::Logging.AbstractLogger = Logging.NullLogger(), - subsolver_options::ROSolverOptions{T} = ROSolverOptions{T}(), init_penalty::Real = T(10), factor_penalty_up::Real = T(2), - ctol::Real = options.ϵa > 0 ? options.ϵa : options.ϵr, + ctol::Real = options.ϵa, init_subtol::Real = T(0.1), factor_primal_linear_improvement::Real = T(3 // 4), factor_decrease_subtol::Real = T(1 // 4), dual_safeguard = project_y!, +) where {H, T <: Real, V} if !(nlp.meta.minimize) error("AL only works for minimization problems") end @@ -134,6 +134,7 @@ function AL( verbose = options.verbose max_time = options.maxTime max_iter = options.maxIter + max_eval = -1 # initialization @assert length(x0) == nlp.meta.nvar @@ -149,13 +150,11 @@ function AL( mu = init_penalty alf = AugLagModel(nlp, y, mu, x, fx, cx) - V = norm(alf.cx, Inf) - V_old = Inf + cviol = norm(alf.cx, Inf) + cviol_old = Inf iter = 0 subiters = 0 done = false - - suboptions = subsolver_options subtol = init_subtol if verbose > 0 @@ -163,7 +162,7 @@ function AL( [:iter, :subiter, :fx, :prim_res, :μ, :normy, :dual_tol, :inner_status], [Int, Int, Float64, Float64, Float64, Float64, Float64, Symbol], ) - @info log_row(Any[iter, subiters, fx, V, alf.μ, norm(y), subtol]) + @info log_row(Any[iter, subiters, fx, cviol, alf.μ, norm(y), subtol]) end while !done @@ -173,10 +172,9 @@ function AL( dual_safeguard(alf) # AL subproblem - suboptions.ϵa = max(subtol, options.ϵa) - suboptions.ϵr = max(subtol, options.ϵr) + subtol = max(subtol, options.ϵa) subout = with_logger(subsolver_logger) do - subsolver(alf, h, suboptions, x0 = x) + subsolver(alf, h, x = x, atol = subtol, rtol = zero(T)) end x .= subout.solution subiters += subout.iter @@ -190,21 +188,22 @@ function AL( set_constraint_multipliers!(stats, alf.y) # stationarity measure + # FIXME it seems that R2 returns no dual_feas in `dual_feas` + # but in `solver_specific.xi` if subout.dual_residual_reliable set_dual_residual!(stats, subout.dual_feas) end # primal violation - V = norm(alf.cx, Inf) - set_primal_residual!(stats, V) + cviol = norm(alf.cx, Inf) + set_primal_residual!(stats, cviol) # termination checks dual_ok = subout.status_reliable && subout.status == :first_order && - suboptions.ϵa <= options.ϵa && - suboptions.ϵr <= options.ϵr - primal_ok = V <= ctol + subtol <= options.ϵa + primal_ok = cviol <= ctol optimal = dual_ok && primal_ok set_iter!(stats, iter) @@ -221,6 +220,7 @@ function AL( unbounded = false, stalled = false, exception = false, + max_eval = max_eval, max_time = max_time, max_iter = max_iter, ), @@ -229,16 +229,16 @@ function AL( done = stats.status != :unknown if verbose > 0 && (mod(stats.iter, verbose) == 0 || done) - @info log_row(Any[iter, subiters, fx, V, alf.μ, norm(alf.y), subtol, subout.status]) + @info log_row(Any[iter, subiters, fx, cviol, alf.μ, norm(alf.y), subtol, subout.status]) end if !done - if V > max(ctol, factor_primal_linear_improvement * V_old) + if cviol > max(ctol, factor_primal_linear_improvement * cviol_old) #@info "decreasing mu" mu *= factor_penalty_up end update_μ!(alf, mu) - V_old = V + cviol_old = cviol subtol *= factor_decrease_subtol end end diff --git a/src/R2_alg.jl b/src/R2_alg.jl index 83556090..53860c9d 100644 --- a/src/R2_alg.jl +++ b/src/R2_alg.jl @@ -203,6 +203,16 @@ function R2( ) end +function R2( + nlp::AbstractNLPModel{R, V}, + h; + selected::AbstractVector{<:Integer} = 1:(nlp.meta.nvar), + kwargs..., +) where {R, V} + reg_nlp = RegularizedNLPModel(nlp, h, selected) + return R2(reg_nlp; kwargs...) +end + function R2( f::F, ∇f!::G, From 936d0f8d384fa16f1268b4284c2dbef9df38d459 Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Sat, 16 Nov 2024 18:00:30 +0100 Subject: [PATCH 08/11] use kwargs for options, allow single input reg_nlp --- examples/demo-cutest.jl | 3 +- src/AL_alg.jl | 145 +++++++++++++++++++++++----------------- 2 files changed, 84 insertions(+), 64 deletions(-) diff --git a/examples/demo-cutest.jl b/examples/demo-cutest.jl index 358201b4..f0383329 100644 --- a/examples/demo-cutest.jl +++ b/examples/demo-cutest.jl @@ -9,8 +9,7 @@ nlp = CUTEstModel(problem_name) h = NormL1(1.0) -options = ROSolverOptions(ϵa = 1e-6, verbose = 1) -stats = AL(nlp, h, options) +stats = AL(nlp, h, atol = 1e-6, verbose = 1) print(stats) finalize(nlp) \ No newline at end of file diff --git a/src/AL_alg.jl b/src/AL_alg.jl index 701f1a62..03e4e26c 100644 --- a/src/AL_alg.jl +++ b/src/AL_alg.jl @@ -1,71 +1,79 @@ export AL -function AL(nlp::AbstractNLPModel, h::H, options::ROSolverOptions; kwargs...) where {H} - if unconstrained(nlp) || bound_constrained(nlp) - return AL(Val(:unc), nlp, h, options; kwargs...) - elseif equality_constrained(nlp) - return AL(Val(:equ), nlp, h, options; kwargs...) +function AL(nlp::AbstractNLPModel, h; kwargs...) + kwargs_dict = Dict(kwargs...) + selected = pop!(kwargs_dict, :selected, 1:(nlp.meta.nvar)) + reg_nlp = RegularizedNLPModel(nlp, h, selected) + return AL(reg_nlp; kwargs...) +end + +function AL(reg_nlp::AbstractRegularizedNLPModel; kwargs...) + if unconstrained(reg_nlp.model) || bound_constrained(reg_nlp.model) + return AL(Val(:unc), reg_nlp; kwargs...) + elseif equality_constrained(reg_nlp.model) + return AL(Val(:equ), reg_nlp; kwargs...) else # has inequalities - return AL(Val(:ineq), nlp, h, options; kwargs...) + return AL(Val(:ineq), reg_nlp; kwargs...) end end function AL( ::Val{:unc}, - nlp::AbstractNLPModel, - h::H, - options::ROSolverOptions; - subsolver = has_bounds(nlp) ? TR : R2, + reg_nlp::AbstractRegularizedNLPModel; + subsolver = has_bounds(reg_nlp.model) ? TR : R2, kwargs..., -) where {H} - if !(unconstrained(nlp) || bound_constrained(nlp)) +) + if !(unconstrained(reg_nlp.model) || bound_constrained(reg_nlp.model)) error( "AL(::Val{:unc}, ...) should only be called for unconstrained or bound-constrained problems. Use AL(...)", ) end @warn "Problem does not have general explicit constraints; calling solver $(string(subsolver))" - return subsolver(nlp, h, options; kwargs...) + return subsolver(reg_nlp; kwargs...) end # a uniform solver interface is missing -# TR(nlp, h, options; kwargs...) = TR(nlp, h, NormLinf(1.0), options; kwargs...) +# TR(nlp, h; kwargs...) = TR(nlp, h, NormLinf(1.0); kwargs...) function AL( ::Val{:ineq}, - nlp::AbstractNLPModel, - h::H, - options::ROSolverOptions{T}; - x0::AbstractVector{T} = nlp.meta.x0, + reg_nlp::AbstractRegularizedNLPModel; + x0::V = reg_nlp.model.meta.x0, kwargs..., -) where {H, T} +) where {V} + nlp = reg_nlp.model if nlp.meta.ncon == 0 || equality_constrained(nlp) error("AL(::Val{:ineq}, ...) should only be called for problems with inequalities. Use AL(...)") end snlp = nlp isa AbstractNLSModel ? SlackNLSModel(nlp) : SlackModel(nlp) + reg_snlp = RegularizedNLPModel(snlp, reg_nlp.h, reg_nlp.selected) if length(x0) != snlp.meta.nvar - x0s = zeros(T, snlp.meta.nvar) - x0s[1:(nlp.meta.nvar)] .= x0 + x = fill!(V(undef, snlp.meta.nvar), zero(eltype(V))) + x[1:(nlp.meta.nvar)] .= x0 else - x0s = x0 + x = x0 end - output = AL(Val(:equ), snlp, h, options; x0 = x0s, kwargs...) + output = AL(Val(:equ), reg_snlp; x0 = x, kwargs...) output.solution = output.solution[1:(nlp.meta.nvar)] return output end """ - AL(nlp, h, options; kwargs...) + AL(reg_nlp; kwargs...) -An augmented Lagrangian method for the problem +An augmented Lagrangian method for constrained regularized optimization, namely problems in the form - min f(x) + h(x) subject to lvar ≤ x ≤ uvar, lcon ≤ c(x) ≤ ucon + minimize f(x) + h(x) + subject to lvar ≤ x ≤ uvar, + lcon ≤ c(x) ≤ ucon where f: ℝⁿ → ℝ, c: ℝⁿ → ℝᵐ and their derivatives are Lipschitz continuous and h: ℝⁿ → ℝ is lower semi-continuous, proper and prox-bounded. -At each iteration, an iterate x is computed as an approximate solution of +At each iteration, an iterate x is computed as an approximate solution of the subproblem - min L(x;y,μ) + h(x) subject to lvar ≤ x ≤ uvar + minimize L(x;y,μ) + h(x) + subject to lvar ≤ x ≤ uvar where y is an estimate of the Lagrange multiplier vector for the constraints lcon ≤ c(x) ≤ ucon, μ is the penalty parameter and L(⋅;y,μ) is the augmented Lagrangian function defined by @@ -74,44 +82,64 @@ where y is an estimate of the Lagrange multiplier vector for the constraints lco ### Arguments -* `nlp::AbstractNLPModel`: a smooth optimization problem -* `h`: a regularizer such as those defined in ProximalOperators -* `options::ROSolverOptions`: a structure containing algorithmic parameters +* `reg_nlp::AbstractRegularizedNLPModel`: a regularized optimization problem, see `RegularizedProblems.jl`, + consisting of `model` representing a smooth optimization problem, see `NLPModels.jl`, and a regularizer `h` such + as those defined in `ProximalOperators.jl`. -The objective and gradient of `nlp` will be accessed. -The Hessian of `nlp` may be accessed or not, depending on the subsolver adopted. +The objective and gradient of `model` will be accessed. +The Hessian of `model` may be accessed or not, depending on the subsolver adopted. If adopted, the Hessian is accessed as an abstract operator and need not be the exact Hessian. ### Keyword arguments -* `x0::AbstractVector`: a primal initial guess (default: `nlp.meta.x0`) -* `y0::AbstractVector`: a dual initial guess (default: `nlp.meta.y0`) -* `subsolver`: the procedure used to compute a step (e.g. `PG`, `R2`, `TR` or `TRDH`) -* `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver -* `subsolver_options::ROSolverOptions`: default options to pass to the subsolver. +* `x0::AbstractVector`: a primal initial guess (default: `reg_nlp.model.meta.x0`) +* `y0::AbstractVector`: a dual initial guess (default: `reg_nlp.model.meta.y0`) +- `atol::T = √eps(T)`: absolute optimality tolerance; +- `ctol::T = atol`: absolute feasibility tolerance; +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration; +- `max_iter::Int = 10000`: maximum number of iterations; +- `max_time::Float64 = 30.0`: maximum time limit in seconds; +- `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); +* `subsolver::AbstractOptimizationSolver = has_bounds(nlp) ? TR : R2`: the procedure used to compute a step (e.g. `PG`, `R2`, `TR` or `TRDH`); +* `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver; +- `init_penalty::T = T(10)`: initial penalty parameter; +- `factor_penalty_up::T = T(2)`: multiplicative factor to increase the penalty parameter; +- `factor_primal_linear_improvement::T = T(3/4)`: fraction to declare sufficient improvement of feasibility; +- `init_subtol::T = T(0.1)`: initial subproblem tolerance; +- `factor_decrease_subtol::T = T(1/4)`: multiplicative factor to decrease the subproblem tolerance; +- `dual_safeguard = (nlp::AugLagModel) -> nothing`: in-place function to modify, as needed, the dual estimate. ### Return values -* `stats::GenericExecutionStats`: solution and other info. +* `stats::GenericExecutionStats`: solution and other info, see `SolverCore.jl`. """ function AL( ::Val{:equ}, - nlp::AbstractNLPModel, - h::H, - options::ROSolverOptions{T}; - x0::V = nlp.meta.x0, - y0::V = nlp.meta.y0, - subsolver = has_bounds(nlp) ? TR : R2, + reg_nlp::AbstractRegularizedNLPModel{T, V}; + x0::V = reg_nlp.model.meta.x0, + y0::V = reg_nlp.model.meta.y0, + atol::T = √eps(T), + verbose::Int = 0, + max_iter::Int = 10000, + max_time::Float64 = 30.0, + max_eval::Int = -1, + subsolver = has_bounds(reg_nlp.model) ? TR : R2, subsolver_logger::Logging.AbstractLogger = Logging.NullLogger(), - init_penalty::Real = T(10), - factor_penalty_up::Real = T(2), - ctol::Real = options.ϵa, - init_subtol::Real = T(0.1), - factor_primal_linear_improvement::Real = T(3 // 4), - factor_decrease_subtol::Real = T(1 // 4), + init_penalty::T = T(10), + factor_penalty_up::T = T(2), + ctol::T = atol, + init_subtol::T = T(0.1), + factor_primal_linear_improvement::T = T(3 // 4), + factor_decrease_subtol::T = T(1 // 4), dual_safeguard = project_y!, -) where {H, T <: Real, V} +) where {T, V} + + # Retrieve workspace + nlp = reg_nlp.model + h = reg_nlp.h + selected = reg_nlp.selected + if !(nlp.meta.minimize) error("AL only works for minimization problems") end @@ -131,10 +159,6 @@ function AL( @assert factor_penalty_up > 1 @assert 0 < factor_primal_linear_improvement < 1 @assert 0 < factor_decrease_subtol < 1 - verbose = options.verbose - max_time = options.maxTime - max_iter = options.maxIter - max_eval = -1 # initialization @assert length(x0) == nlp.meta.nvar @@ -172,9 +196,9 @@ function AL( dual_safeguard(alf) # AL subproblem - subtol = max(subtol, options.ϵa) + subtol = max(subtol, atol) subout = with_logger(subsolver_logger) do - subsolver(alf, h, x = x, atol = subtol, rtol = zero(T)) + subsolver(alf, h, x = x, atol = subtol, rtol = zero(T), selected = selected) end x .= subout.solution subiters += subout.iter @@ -199,10 +223,7 @@ function AL( set_primal_residual!(stats, cviol) # termination checks - dual_ok = - subout.status_reliable && - subout.status == :first_order && - subtol <= options.ϵa + dual_ok = subout.status_reliable && subout.status == :first_order && subtol <= atol primal_ok = cviol <= ctol optimal = dual_ok && primal_ok From 691dfe4149af7f622c933ab866c94201d5772d48 Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Mon, 18 Nov 2024 10:15:20 +0100 Subject: [PATCH 09/11] add ALSolver and solve!, uniform kwargs --- examples/demo-cutest.jl | 17 ++++ src/AL_alg.jl | 195 ++++++++++++++++++++++++++++------------ 2 files changed, 153 insertions(+), 59 deletions(-) diff --git a/examples/demo-cutest.jl b/examples/demo-cutest.jl index f0383329..683fe5a0 100644 --- a/examples/demo-cutest.jl +++ b/examples/demo-cutest.jl @@ -12,4 +12,21 @@ h = NormL1(1.0) stats = AL(nlp, h, atol = 1e-6, verbose = 1) print(stats) +using RegularizedProblems + +regnlp = RegularizedNLPModel(nlp, h) +stats = AL(regnlp, atol = 1e-6, verbose = 1) +print(stats) + +solver = ALSolver(regnlp) +stats = solve!(solver, regnlp, atol = 1e-6, verbose = 1) +print(stats) + +using SolverCore + +stats = GenericExecutionStats(nlp) +solver = ALSolver(regnlp) +stats = solve!(solver, regnlp, stats, atol = 1e-6, verbose = 1) +print(stats) + finalize(nlp) \ No newline at end of file diff --git a/src/AL_alg.jl b/src/AL_alg.jl index 03e4e26c..377c5549 100644 --- a/src/AL_alg.jl +++ b/src/AL_alg.jl @@ -1,4 +1,6 @@ -export AL +export AL, ALSolver, solve! + +import SolverCore.solve! function AL(nlp::AbstractNLPModel, h; kwargs...) kwargs_dict = Dict(kwargs...) @@ -32,13 +34,10 @@ function AL( return subsolver(reg_nlp; kwargs...) end -# a uniform solver interface is missing -# TR(nlp, h; kwargs...) = TR(nlp, h, NormLinf(1.0); kwargs...) - function AL( ::Val{:ineq}, reg_nlp::AbstractRegularizedNLPModel; - x0::V = reg_nlp.model.meta.x0, + x::V = reg_nlp.model.meta.x0, kwargs..., ) where {V} nlp = reg_nlp.model @@ -47,13 +46,13 @@ function AL( end snlp = nlp isa AbstractNLSModel ? SlackNLSModel(nlp) : SlackModel(nlp) reg_snlp = RegularizedNLPModel(snlp, reg_nlp.h, reg_nlp.selected) - if length(x0) != snlp.meta.nvar - x = fill!(V(undef, snlp.meta.nvar), zero(eltype(V))) - x[1:(nlp.meta.nvar)] .= x0 + if length(x) != snlp.meta.nvar + xs = fill!(V(undef, snlp.meta.nvar), zero(eltype(V))) + xs[1:(nlp.meta.nvar)] .= x else - x = x0 + xs = x end - output = AL(Val(:equ), reg_snlp; x0 = x, kwargs...) + output = AL(Val(:equ), reg_snlp; x = xs, kwargs...) output.solution = output.solution[1:(nlp.meta.nvar)] return output end @@ -80,9 +79,18 @@ where y is an estimate of the Lagrange multiplier vector for the constraints lco L(x;y,μ) := f(x) - yᵀc(x) + ½ μ ‖c(x)‖². -### Arguments +For advanced usage, first define a solver "ALSolver" to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = ALSolver(reg_nlp) + solve!(solver, reg_nlp) + + stats = GenericExecutionStats(reg_nlp.model) + solver = ALSolver(reg_nlp) + solve!(solver, reg_nlp, stats) -* `reg_nlp::AbstractRegularizedNLPModel`: a regularized optimization problem, see `RegularizedProblems.jl`, +# Arguments + +- `reg_nlp::AbstractRegularizedNLPModel`: a regularized optimization problem, see `RegularizedProblems.jl`, consisting of `model` representing a smooth optimization problem, see `NLPModels.jl`, and a regularizer `h` such as those defined in `ProximalOperators.jl`. @@ -90,18 +98,18 @@ The objective and gradient of `model` will be accessed. The Hessian of `model` may be accessed or not, depending on the subsolver adopted. If adopted, the Hessian is accessed as an abstract operator and need not be the exact Hessian. -### Keyword arguments +# Keyword arguments -* `x0::AbstractVector`: a primal initial guess (default: `reg_nlp.model.meta.x0`) -* `y0::AbstractVector`: a dual initial guess (default: `reg_nlp.model.meta.y0`) +- `x::AbstractVector`: a primal initial guess (default: `reg_nlp.model.meta.x0`) +- `y::AbstractVector`: a dual initial guess (default: `reg_nlp.model.meta.y0`) - `atol::T = √eps(T)`: absolute optimality tolerance; - `ctol::T = atol`: absolute feasibility tolerance; - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration; - `max_iter::Int = 10000`: maximum number of iterations; - `max_time::Float64 = 30.0`: maximum time limit in seconds; - `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); -* `subsolver::AbstractOptimizationSolver = has_bounds(nlp) ? TR : R2`: the procedure used to compute a step (e.g. `PG`, `R2`, `TR` or `TRDH`); -* `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver; +- `subsolver::AbstractOptimizationSolver = has_bounds(nlp) ? TR : R2`: the procedure used to compute a step (e.g. `PG`, `R2`, `TR` or `TRDH`); +- `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver; - `init_penalty::T = T(10)`: initial penalty parameter; - `factor_penalty_up::T = T(2)`: multiplicative factor to increase the penalty parameter; - `factor_primal_linear_improvement::T = T(3/4)`: fraction to declare sufficient improvement of feasibility; @@ -109,23 +117,72 @@ If adopted, the Hessian is accessed as an abstract operator and need not be the - `factor_decrease_subtol::T = T(1/4)`: multiplicative factor to decrease the subproblem tolerance; - `dual_safeguard = (nlp::AugLagModel) -> nothing`: in-place function to modify, as needed, the dual estimate. -### Return values - -* `stats::GenericExecutionStats`: solution and other info, see `SolverCore.jl`. +# Output +- `stats::GenericExecutionStats`: solution and other info, see `SolverCore.jl`. """ -function AL( - ::Val{:equ}, - reg_nlp::AbstractRegularizedNLPModel{T, V}; - x0::V = reg_nlp.model.meta.x0, - y0::V = reg_nlp.model.meta.y0, +mutable struct ALSolver{T, V, M, ST} <: AbstractOptimizationSolver + x::V + cx::V + y::V + has_bnds::Bool + sub_model::AugLagModel{M, T, V} + sub_solver::ST + sub_stats::GenericExecutionStats{T, V, V, Any} +end + +function ALSolver(reg_nlp::AbstractRegularizedNLPModel{T, V}; kwargs...) where {T, V} + nlp = reg_nlp.model + nvar, ncon = nlp.meta.nvar, nlp.meta.ncon + x = V(undef, nvar) + cx = V(undef, ncon) + y = V(undef, ncon) + has_bnds = has_bounds(nlp) + sub_model = AugLagModel(nlp, V(undef, ncon), T(0), x, T(0), V(undef, ncon)) + sub_solver = R2Solver(reg_nlp; kwargs...) + sub_stats = GenericExecutionStats(sub_model) + M = typeof(nlp) + ST = typeof(sub_solver) + return ALSolver{T, V, M, ST}(x, cx, y, has_bnds, sub_model, sub_solver, sub_stats) +end + +@doc (@doc ALSolver) function AL(::Val{:equ}, reg_nlp::AbstractRegularizedNLPModel; kwargs...) + nlp = reg_nlp.model + if !(nlp.meta.minimize) + error("AL only works for minimization problems") + end + if nlp.meta.ncon == 0 || !equality_constrained(nlp) + error( + "AL(::Val{:equ}, ...) should only be called for equality-constrained problems with bounded variables. Use AL(...)", + ) + end + solver = ALSolver(reg_nlp) + solve!(solver, reg_nlp; kwargs...) +end + +function SolverCore.solve!( + solver::AbstractOptimizationSolver, + model::AbstractRegularizedNLPModel; + kwargs..., +) + stats = GenericExecutionStats(model.model) + solve!(solver, model, stats; kwargs...) +end + +function SolverCore.solve!( + solver::ALSolver{T, V}, + reg_nlp::AbstractRegularizedNLPModel{T, V}, + stats::GenericExecutionStats{T, V}; + x::V = reg_nlp.model.meta.x0, + y::V = reg_nlp.model.meta.y0, atol::T = √eps(T), verbose::Int = 0, max_iter::Int = 10000, max_time::Float64 = 30.0, max_eval::Int = -1, - subsolver = has_bounds(reg_nlp.model) ? TR : R2, - subsolver_logger::Logging.AbstractLogger = Logging.NullLogger(), + subsolver_verbose::Int = 0, + subsolver_max_iter::Int = 100000, + subsolver_max_eval::Int = -1, init_penalty::T = T(10), factor_penalty_up::T = T(2), ctol::T = atol, @@ -140,6 +197,7 @@ function AL( h = reg_nlp.h selected = reg_nlp.selected + # Sanity checks if !(nlp.meta.minimize) error("AL only works for minimization problems") end @@ -148,68 +206,87 @@ function AL( "AL(::Val{:equ}, ...) should only be called for equality-constrained problems. Use AL(...)", ) end + @assert length(solver.x) == nlp.meta.nvar + @assert length(solver.y) == nlp.meta.ncon + #TODO check solver.has_bnds with has_bounds(nlp) for solver.sub_solver - stats = GenericExecutionStats(nlp) + reset!(stats) set_iter!(stats, 0) start_time = time() set_time!(stats, 0.0) - # parameters + # check parameter values @assert init_penalty > 0 @assert factor_penalty_up > 1 @assert 0 < factor_primal_linear_improvement < 1 @assert 0 < factor_decrease_subtol < 1 # initialization - @assert length(x0) == nlp.meta.nvar - @assert length(y0) == nlp.meta.ncon - x = similar(nlp.meta.x0) - y = similar(nlp.meta.y0) - x .= x0 - y .= y0 - set_solution!(stats, x) - set_constraint_multipliers!(stats, y) - - fx, cx = objcons(nlp, x) + solver.x .= max.(nlp.meta.lvar, min.(x, nlp.meta.uvar)) + solver.y .= y + set_solution!(stats, solver.x) + set_constraint_multipliers!(stats, solver.y) + subout = solver.sub_stats + + objx, _ = objcons!(nlp, solver.x, solver.cx) + objx += @views h(solver.x[selected]) + set_objective!(stats, objx) + mu = init_penalty - alf = AugLagModel(nlp, y, mu, x, fx, cx) + solver.sub_model.y .= solver.y + update_μ!(solver.sub_model, mu) - cviol = norm(alf.cx, Inf) + cviol = norm(solver.cx, Inf) cviol_old = Inf iter = 0 subiters = 0 done = false subtol = init_subtol + rem_eval = max_eval if verbose > 0 @info log_header( - [:iter, :subiter, :fx, :prim_res, :μ, :normy, :dual_tol, :inner_status], + [:iter, :sub_it, :obj, :cviol, :μ, :normy, :sub_tol, :sub_status], [Int, Int, Float64, Float64, Float64, Float64, Float64, Symbol], ) - @info log_row(Any[iter, subiters, fx, cviol, alf.μ, norm(y), subtol]) + @info log_row(Any[iter, subiters, objx, cviol, mu, norm(solver.y), subtol]) end while !done iter += 1 # dual safeguard - dual_safeguard(alf) + dual_safeguard(solver.sub_model) # AL subproblem + sub_reg_nlp = RegularizedNLPModel(solver.sub_model, h, selected) subtol = max(subtol, atol) - subout = with_logger(subsolver_logger) do - subsolver(alf, h, x = x, atol = subtol, rtol = zero(T), selected = selected) - end - x .= subout.solution + reset!(subout) + solve!( + solver.sub_solver, + sub_reg_nlp, + subout, + x = solver.x, + atol = subtol, + rtol = zero(T), + max_time = max_time - stats.elapsed_time, + max_eval = subsolver_max_eval < 0 ? rem_eval : min(subsolver_max_eval, rem_eval), + max_iter = subsolver_max_iter, + verbose = subsolver_verbose, + ) + solver.x .= subout.solution + solver.cx .= solver.sub_model.cx subiters += subout.iter # objective - fx = obj(nlp, x) - set_objective!(stats, fx) + objx = obj(nlp, solver.x) + objx += @views h(solver.x[selected]) + set_objective!(stats, objx) # dual estimate - update_y!(alf) - set_constraint_multipliers!(stats, alf.y) + update_y!(solver.sub_model) + solver.y .= solver.sub_model.y + set_constraint_multipliers!(stats, solver.y) # stationarity measure # FIXME it seems that R2 returns no dual_feas in `dual_feas` @@ -219,7 +296,7 @@ function AL( end # primal violation - cviol = norm(alf.cx, Inf) + cviol = norm(solver.cx, Inf) set_primal_residual!(stats, cviol) # termination checks @@ -250,22 +327,22 @@ function AL( done = stats.status != :unknown if verbose > 0 && (mod(stats.iter, verbose) == 0 || done) - @info log_row(Any[iter, subiters, fx, cviol, alf.μ, norm(alf.y), subtol, subout.status]) + @info log_row(Any[iter, subiters, objx, cviol, mu, norm(solver.y), subtol, subout.status]) end if !done if cviol > max(ctol, factor_primal_linear_improvement * cviol_old) - #@info "decreasing mu" mu *= factor_penalty_up end - update_μ!(alf, mu) + update_μ!(solver.sub_model, mu) cviol_old = cviol subtol *= factor_decrease_subtol + rem_eval = max_eval < 0 ? max_eval : max_eval - neval_obj(nlp) end end - set_solution!(stats, x) - set_constraint_multipliers!(stats, alf.y) - return stats + set_solution!(stats, solver.x) + set_constraint_multipliers!(stats, solver.y) + stats end """ From 2073cbbfb14289cab6338fea0d35c1532a86523a Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Mon, 18 Nov 2024 10:32:25 +0100 Subject: [PATCH 10/11] add callback --- examples/demo-cutest.jl | 7 +++++++ src/AL_alg.jl | 20 ++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/examples/demo-cutest.jl b/examples/demo-cutest.jl index 683fe5a0..e19f9f82 100644 --- a/examples/demo-cutest.jl +++ b/examples/demo-cutest.jl @@ -29,4 +29,11 @@ solver = ALSolver(regnlp) stats = solve!(solver, regnlp, stats, atol = 1e-6, verbose = 1) print(stats) +callback = + (regnlp, solver, stats) -> begin + @info "iter $(stats.iter), obj $(stats.objective), status $(stats.status)" + end +stats = AL(nlp, h, atol = 1e-6, verbose = 1, callback = callback) +print(stats) + finalize(nlp) \ No newline at end of file diff --git a/src/AL_alg.jl b/src/AL_alg.jl index 377c5549..a1e0ffbf 100644 --- a/src/AL_alg.jl +++ b/src/AL_alg.jl @@ -120,6 +120,21 @@ If adopted, the Hessian is accessed as an abstract operator and need not be the # Output - `stats::GenericExecutionStats`: solution and other info, see `SolverCore.jl`. + +# Callback +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user` will stop the algorithm. +All relevant information should be available in `reg_nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.x`: current iterate; +- `solver.y`: current dual estimate; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention; + - `stats.elapsed_time`: elapsed time in seconds. """ mutable struct ALSolver{T, V, M, ST} <: AbstractOptimizationSolver x::V @@ -173,6 +188,7 @@ function SolverCore.solve!( solver::ALSolver{T, V}, reg_nlp::AbstractRegularizedNLPModel{T, V}, stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, x::V = reg_nlp.model.meta.x0, y::V = reg_nlp.model.meta.y0, atol::T = √eps(T), @@ -252,6 +268,8 @@ function SolverCore.solve!( @info log_row(Any[iter, subiters, objx, cviol, mu, norm(solver.y), subtol]) end + callback(reg_nlp, solver, stats) + while !done iter += 1 @@ -324,6 +342,8 @@ function SolverCore.solve!( ), ) + callback(reg_nlp, solver, stats) + done = stats.status != :unknown if verbose > 0 && (mod(stats.iter, verbose) == 0 || done) From 247480d80f4f0b64fc3070e1a0ba90b177d77ecc Mon Sep 17 00:00:00 2001 From: Alberto De Marchi Date: Mon, 18 Nov 2024 10:39:21 +0100 Subject: [PATCH 11/11] add solver_specific in stats --- examples/demo-cutest.jl | 7 +++++++ src/AL_alg.jl | 18 +++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/examples/demo-cutest.jl b/examples/demo-cutest.jl index e19f9f82..db281b99 100644 --- a/examples/demo-cutest.jl +++ b/examples/demo-cutest.jl @@ -36,4 +36,11 @@ callback = stats = AL(nlp, h, atol = 1e-6, verbose = 1, callback = callback) print(stats) +callback = + (regnlp, solver, stats) -> begin + @info "iter $(stats.iter), f $(stats.solver_specific[:smooth_obj]), h $(stats.solver_specific[:nonsmooth_obj])" + end +stats = AL(nlp, h, atol = 1e-6, verbose = 1, callback = callback) +print(stats) + finalize(nlp) \ No newline at end of file diff --git a/src/AL_alg.jl b/src/AL_alg.jl index a1e0ffbf..a27d970f 100644 --- a/src/AL_alg.jl +++ b/src/AL_alg.jl @@ -134,7 +134,9 @@ Notably, you can access, and modify, the following: - `stats.iter`: current iteration counter; - `stats.objective`: current objective function value; - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention; - - `stats.elapsed_time`: elapsed time in seconds. + - `stats.elapsed_time`: elapsed time in seconds; + - `stats.solver_specific[:smooth_obj]`: current value of the smooth part of the objective function; + - `stats.solver_specific[:nonsmooth_obj]`: current value of the nonsmooth part of the objective function. """ mutable struct ALSolver{T, V, M, ST} <: AbstractOptimizationSolver x::V @@ -244,9 +246,12 @@ function SolverCore.solve!( set_constraint_multipliers!(stats, solver.y) subout = solver.sub_stats - objx, _ = objcons!(nlp, solver.x, solver.cx) - objx += @views h(solver.x[selected]) + fx, _ = objcons!(nlp, solver.x, solver.cx) + hx = @views h(solver.x[selected]) + objx = fx + hx set_objective!(stats, objx) + set_solver_specific!(stats, :smooth_obj, fx) + set_solver_specific!(stats, :nonsmooth_obj, hx) mu = init_penalty solver.sub_model.y .= solver.y @@ -297,9 +302,12 @@ function SolverCore.solve!( subiters += subout.iter # objective - objx = obj(nlp, solver.x) - objx += @views h(solver.x[selected]) + fx = obj(nlp, solver.x) + hx = @views h(solver.x[selected]) + objx = fx + hx set_objective!(stats, objx) + set_solver_specific!(stats, :smooth_obj, fx) + set_solver_specific!(stats, :nonsmooth_obj, hx) # dual estimate update_y!(solver.sub_model)