From 23c82b1c96db6e635a3a6c468fc18fb40133e066 Mon Sep 17 00:00:00 2001 From: Geoffroy Leconte Date: Fri, 10 Feb 2023 20:44:04 -0500 Subject: [PATCH 01/12] TRDH solver and subsolver --- Project.toml | 2 + src/RegularizedOptimization.jl | 3 +- src/TRDH_alg.jl | 329 +++++++++++++++++++++++++++++++++ src/input_struct.jl | 6 +- src/utils.jl | 14 ++ test/runtests.jl | 2 +- test/test_bounds.jl | 2 +- 7 files changed, 354 insertions(+), 4 deletions(-) create mode 100644 src/TRDH_alg.jl diff --git a/Project.toml b/Project.toml index 02f46a80..9ea028e7 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.1.0" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LinearOperators = "5c8ed15e-5a4c-59e4-a42b-c7e8811fb125" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" NLPModelsModifiers = "e01155f1-5c6f-4375-a9d8-616dd036575f" @@ -15,6 +16,7 @@ SolverCore = "ff4d7338-4cf1-434d-91df-b86cb86fb843" TSVD = "9449cd9e-2762-5aa3-a617-5413e99d722e" [compat] +LinearOperators = "2.5.1" NLPModels = "0.19" NLPModelsModifiers = "0.6" ProximalOperators = "0.15" diff --git a/src/RegularizedOptimization.jl b/src/RegularizedOptimization.jl index d71d32d2..458a8cce 100644 --- a/src/RegularizedOptimization.jl +++ b/src/RegularizedOptimization.jl @@ -7,7 +7,7 @@ using LinearAlgebra, Logging, Printf using ProximalOperators, TSVD # dependencies from us -using NLPModels, NLPModelsModifiers, ShiftedProximalOperators, SolverCore +using LinearOperators, NLPModels, NLPModelsModifiers, ShiftedProximalOperators, SolverCore include("utils.jl") include("input_struct.jl") @@ -15,6 +15,7 @@ include("PG_alg.jl") include("Fista_alg.jl") include("splitting.jl") include("TR_alg.jl") +include("TRDH_alg.jl") include("R2_alg.jl") include("LM_alg.jl") include("LMTR_alg.jl") diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl new file mode 100644 index 00000000..5e4d6629 --- /dev/null +++ b/src/TRDH_alg.jl @@ -0,0 +1,329 @@ +export TRDH + +""" + TRDH(nlp, h, χ, options; kwargs...) + +A trust-region method for the problem + + min f(x) + h(x) + +where f: ℝⁿ → ℝ has a Lipschitz-continuous Jacobian, and h: ℝⁿ → ℝ is +lower semi-continuous and proper. + +About each iterate xₖ, a step sₖ is computed as an approximate solution of + + min φ(s; xₖ) + ψ(s; xₖ) subject to ‖s‖ ≤ Δₖ + +where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ Bₖ s is a quadratic approximation of f about xₖ, +ψ(s; xₖ) = h(xₖ + s), ‖⋅‖ is a user-defined norm and Δₖ > 0 is the trust-region radius. +The subproblem is solved inexactly by way of a first-order method such as the proximal-gradient +method or the quadratic regularization method. + +### Arguments + +* `nlp::AbstractNLPModel`: a smooth optimization problem +* `h`: a regularizer such as those defined in ProximalOperators +* `χ`: a norm used to define the trust region in the form of a regularizer +* `options::ROSolverOptions`: a structure containing algorithmic parameters + +The objective, gradient and Hessian of `nlp` will be accessed. +The Hessian is accessed as an abstract operator and need not be the exact Hessian. + +### Keyword arguments + +* `x0::AbstractVector`: an initial guess (default: `nlp.meta.x0`) +* `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver (default: the null logger) +* `subsolver`: the procedure used to compute a step (`PG` or `R2`) +* `subsolver_options::ROSolverOptions`: default options to pass to the subsolver (default: all defaut options) +* `selected::AbstractVector{<:Integer}`: (default `1:f.meta.nvar`). + +### Return values + +* `xk`: the final iterate +* `Fobj_hist`: an array with the history of values of the smooth objective +* `Hobj_hist`: an array with the history of values of the nonsmooth objective +* `Complex_hist`: an array with the history of number of inner iterations. +""" +function TRDH(nlp::AbstractNLPModel{R}, h, χ, options::ROSolverOptions{R}; kwargs...) where {R <: Real} + kwargs_dict = Dict(kwargs...) + x0 = pop!(kwargs_dict, :x0, nlp.meta.x0) + xk, k, outdict = TRDH( + x -> obj(nlp, x), + (g, x) -> grad!(nlp, x, g), + h, + options, + x0; + χ = χ, + l_bound = nlp.meta.lvar, + u_bound = nlp.meta.uvar, + kwargs..., + ) + ξ = outdict[:ξ] + stats = GenericExecutionStats(nlp) + set_status!(stats, outdict[:status]) + set_solution!(stats, xk) + set_objective!(stats, outdict[:fk] + outdict[:hk]) + set_residuals!(stats, zero(eltype(xk)), ξ ≥ 0 ? sqrt(ξ) : ξ) + set_iter!(stats, k) + set_time!(stats, outdict[:elapsed_time]) + set_solver_specific!(stats, :Fhist, outdict[:Fhist]) + set_solver_specific!(stats, :Hhist, outdict[:Hhist]) + set_solver_specific!(stats, :NonSmooth, outdict[:NonSmooth]) + set_solver_specific!(stats, :SubsolverCounter, outdict[:Chist]) + return stats +end + +function TRDH( + f::F, + ∇f!::G, + h::H, + options::ROSolverOptions{R}, + x0::AbstractVector{R}; + χ::X = NormLinf(one(R)), + selected::AbstractVector{<:Integer} = 1:length(x0), + kwargs..., +) where {R <: Real, F, G, H, X} + start_time = time() + elapsed_time = 0.0 + ϵ = options.ϵa + ϵr = options.ϵr + Δk = options.Δk + verbose = options.verbose + maxIter = options.maxIter + maxTime = options.maxTime + η1 = options.η1 + η2 = options.η2 + γ = options.γ + α = options.α + β = options.β + spectral = options.spectral + psb = options.psb + hess_init_val = one(R) / options.ν + + local l_bound, u_bound + has_bnds = false + for (key, val) in kwargs + if key == :l_bound + l_bound = val + has_bnds = has_bnds || any(l_bound .!= R(-Inf)) + elseif key == :u_bound + u_bound = val + has_bnds = has_bnds || any(u_bound .!= R(Inf)) + end + end + + if verbose == 0 + ptf = Inf + elseif verbose == 1 + ptf = round(maxIter / 10) + elseif verbose == 2 + ptf = round(maxIter / 100) + else + ptf = 1 + end + + # initialize parameters + xk = copy(x0) + hk = h(xk[selected]) + if hk == Inf + verbose > 0 && @info "R2: finding initial guess where nonsmooth term is finite" + prox!(xk, h, x0, one(eltype(x0))) + hk = h(xk[selected]) + hk < Inf || error("prox computation must be erroneous") + verbose > 0 && @debug "R2: found point where h has value" hk + end + hk == -Inf && error("nonsmooth term is not proper") + + xkn = similar(xk) + s = zero(xk) + l_bound_k = similar(xk) + u_bound_k = similar(xk) + if h isa ShiftedProximableFunction # case TRDH is used as a subsolver + is_subsolver = true + ψ = shifted(h, xk) + @assert !has_bnds + l_bound = copy(ψ.l) + u_bound = copy(ψ.u) + @. l_bound_k = max(xk - Δk, l_bound) + @. u_bound_k = min(xk + Δk, u_bound) + has_bnds = true + set_bounds!(ψ, l_bound_k, u_bound_k) + else + is_subsolver = false + if has_bnds + @. l_bound_k = max(-Δk, l_bound - xk) + @. u_bound_k = min(Δk, u_bound - xk) + ψ = shifted(h, xk, l_bound_k, u_bound_k, selected) + else + ψ = shifted(h, xk, Δk, χ) + end + end + + Fobj_hist = zeros(maxIter) + Hobj_hist = zeros(maxIter) + Complex_hist = zeros(Int, maxIter) + if verbose > 0 + #! format: off + @info @sprintf "%6s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "‖Bₖ‖" "TRDH" + #! format: off + end + + local ξ1 + k = 0 + + fk = f(xk) + ∇fk = similar(xk) + ∇f!(∇fk, xk) + ∇fk⁻ = copy(∇fk) + Dk = spectral ? SpectralGradient(hess_init_val, length(xk)) : + DiagonalQN(fill!(similar(xk), hess_init_val), psb) + νInv = (norm(Dk.d, Inf) + one(R) / (α * Δk)) + ν = one(R) / νInv + mν∇fk = -ν .* ∇fk + mdinv∇fk = .-∇fk ./ Dk.d + + optimal = false + tired = maxIter > 0 && k ≥ maxIter || elapsed_time > maxTime + + while !(optimal || tired) + k = k + 1 + elapsed_time = time() - start_time + Fobj_hist[k] = fk + Hobj_hist[k] = hk + + # model for prox-gradient step to update Δk if ||s|| is too small and ξ1 + φ1(d) = ∇fk' * d + mk1(d) = φ1(d) + ψ(d) + + prox!(s, ψ, mν∇fk, ν) + Complex_hist[k] += 1 + ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() # ? + ξ1 > 0 || error("TR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + + if ξ1 ≥ 0 && k == 1 + ϵ += ϵr * sqrt(ξ1) # make stopping test absolute and relative + end + + if sqrt(ξ1) < ϵ + # the current xk is approximately first-order stationary + optimal = true + continue + end + + Δ_effective = min(β * χ(s), Δk) + # update radius + if has_bnds + if is_subsolver + @. l_bound_k = max(xk - Δ_effective, l_bound) + @. u_bound_k = min(xk + Δ_effective, u_bound) + else + @. l_bound_k = max(-Δ_effective, l_bound - xk) + @. u_bound_k = min(Δ_effective, u_bound - xk) + end + set_bounds!(ψ, l_bound_k, u_bound_k) + else + set_radius!(ψ, Δ_effective) + end + + # model with diagonal hessian + φ(d) = ∇fk' * d + (d' * (Dk.d .* d)) / 2 + mk(d) = φ(d) + ψ(d) + + iprox!(s, ψ, ∇fk, Dk) + Complex_hist[k] += 1 + + sNorm = χ(s) + xkn .= xk .+ s + fkn = f(xkn) + hkn = h(xkn[selected]) + hkn == -Inf && error("nonsmooth term is not proper") + + Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() + ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() + + if (ξ ≤ 0 || isnan(ξ)) + error("TRDH: failed to compute a step: ξ = $ξ") + end + + ρk = Δobj / ξ + + TR_stat = (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "=") + + if (verbose > 0) && (k % ptf == 0) + #! format: off + @info @sprintf "%6d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt(ξ1) sqrt(ξ) ρk Δk χ(xk) sNorm norm(Dk.d) TR_stat + #! format: on + end + + if has_bnds + if is_subsolver + @. l_bound_k = max(xk - Δ_effective, l_bound) + @. u_bound_k = min(xk + Δ_effective, u_bound) + else + @. l_bound_k = max(-Δ_effective, l_bound - xk) + @. u_bound_k = min(Δ_effective, u_bound - xk) + end + end + + if η2 ≤ ρk < Inf + Δk = max(Δk, γ * sNorm) + !has_bnds && set_radius!(ψ, Δk) + end + + if η1 ≤ ρk < Inf + xk .= xkn + has_bnds && set_bounds!(ψ, l_bound_k, u_bound_k) + #update functions + fk = fkn + hk = hkn + shift!(ψ, xk) + ∇f!(∇fk, xk) + push!(Dk, s, ∇fk - ∇fk⁻) # update QN operator + νInv = (norm(Dk.d, Inf) + one(R) / (α * Δk)) + ν = one(R) / νInv + ∇fk⁻ .= ∇fk + mν∇fk .= -ν .* ∇fk + mdinv∇fk .= .-∇fk ./ Dk.d + end + + if ρk < η1 || ρk == Inf + Δk = Δk / 2 + has_bnds ? set_bounds!(ψ, l_bound_k, u_bound_k) : set_radius!(ψ, Δk) + end + tired = k ≥ maxIter || elapsed_time > maxTime + end + + if verbose > 0 + if k == 1 + @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk + elseif optimal + #! format: off + @info @sprintf "%6d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) νInv + #! format: on + @info "TRDH: terminating with √ξ1 = $(sqrt(ξ1))" + end + end + + status = if optimal + :first_order + elseif elapsed_time > maxTime + :max_time + elseif tired + :max_iter + else + :exception + end + outdict = Dict( + :Fhist => Fobj_hist[1:k], + :Hhist => Hobj_hist[1:k], + :Chist => Complex_hist[1:k], + :NonSmooth => h, + :status => status, + :fk => fk, + :hk => hk, + :ξ => ξ1 ≥ 0 ? sqrt(ξ1) : ξ1, + :elapsed_time => elapsed_time, + ) + + return xk, k, outdict +end diff --git a/src/input_struct.jl b/src/input_struct.jl index 0aa2243c..a1bbd934 100644 --- a/src/input_struct.jl +++ b/src/input_struct.jl @@ -16,6 +16,8 @@ mutable struct ROSolverOptions{R} γ::R # trust region buffer θ::R # step length factor in relation to Hessian norm β::R # TR size as factor of first PG step + spectral::Bool # for TRDH: use spectral gradient update if true, otherwise DiagonalQN + psb::Bool # for TRDH with DiagonalQN (spectral = false): use PSB update if true, otherwise Andrei update function ROSolverOptions{R}(; ϵa::R = √eps(R), @@ -33,6 +35,8 @@ mutable struct ROSolverOptions{R} γ::R = R(3), θ::R = R(1e-3), β::R = 1 / eps(R), + spectral::Bool = false, + psb::Bool = false, ) where {R <: Real} @assert ϵa ≥ 0 @assert ϵr ≥ 0 @@ -48,7 +52,7 @@ mutable struct ROSolverOptions{R} @assert γ > 1 @assert θ > 0 @assert β ≥ 1 - return new{R}(ϵa, ϵr, neg_tol, Δk, verbose, maxIter, maxTime, σmin, η1, η2, α, ν, γ, θ, β) + return new{R}(ϵa, ϵr, neg_tol, Δk, verbose, maxIter, maxTime, σmin, η1, η2, α, ν, γ, θ, β, spectral, psb) end end diff --git a/src/utils.jl b/src/utils.jl index f2913a75..d09ba620 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -3,3 +3,17 @@ function LinearAlgebra.opnorm(B; kwargs...) _, s, _ = tsvd(B) return s[1] end + +ShiftedProximalOperators.iprox!( + y::AbstractVector, + ψ::ShiftedProximableFunction, + g::AbstractVector, + D::DiagonalQN, +) = iprox!(y, ψ, g, D.d) + +ShiftedProximalOperators.iprox!( + y::AbstractVector, + ψ::ShiftedProximableFunction, + g::AbstractVector, + D::SpectralGradient, +) = iprox!(y, ψ, g, fill!(similar(g), D.d[1])) diff --git a/test/runtests.jl b/test/runtests.jl index 1dc1db2f..c4629acc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,7 +5,7 @@ using NLPModels, NLPModelsModifiers, RegularizedProblems, RegularizedOptimizatio const global compound = 1 const global nz = 10 * compound -const global options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 10) +const global options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 10, spectral = true) const global bpdn, bpdn_nls, sol = bpdn_model(compound) const global bpdn2, bpdn_nls2, sol2 = bpdn_model(compound, bounds = true) const global λ = norm(grad(bpdn, zeros(bpdn.meta.nvar)), Inf) / 10 diff --git a/test/test_bounds.jl b/test/test_bounds.jl index 1998bd8a..5546de90 100644 --- a/test/test_bounds.jl +++ b/test/test_bounds.jl @@ -1,6 +1,6 @@ for (mod, mod_name) ∈ ((x -> x, "exact"), (LSR1Model, "lsr1"), (LBFGSModel, "lbfgs")) for (h, h_name) ∈ ((NormL0(λ), "l0"), (NormL1(λ), "l1")) - for solver_sym ∈ (:TR, :R2) + for solver_sym ∈ (:TR, :R2, :TRDH) solver_sym == :TR && mod_name == "exact" && continue solver_name = string(solver_sym) solver = eval(solver_sym) From f704bc244b7566cfa674b728df7095404f565853 Mon Sep 17 00:00:00 2001 From: Geoffroy Leconte Date: Tue, 14 Feb 2023 17:29:10 -0500 Subject: [PATCH 02/12] update examples --- examples/demo-bpdn-constr.jl | 8 +++++++- examples/demo-nnmf-constr.jl | 21 +++++++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/examples/demo-bpdn-constr.jl b/examples/demo-bpdn-constr.jl index c4ccb9d6..21559562 100644 --- a/examples/demo-bpdn-constr.jl +++ b/examples/demo-bpdn-constr.jl @@ -9,7 +9,7 @@ include("plot-utils-bpdn.jl") Random.seed!(1234) function demo_solver(f, nls, sol, h, χ, suffix = "l0-linf") - options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 10) + options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 10, spectral = false, psb = true) @info " using TR to solve with" h χ reset!(f) @@ -22,6 +22,12 @@ function demo_solver(f, nls, sol, h, χ, suffix = "l0-linf") R2_out = R2(f, h, options, x0 = f.meta.x0) @info "R2 relative error" norm(R2_out.solution - sol) / norm(sol) plot_bpdn(R2_out, sol, "constr-r2-$(suffix)") + + @info " using TRDH to solve with" h χ + reset!(f) + TRDH_out = TRDH(f, h, χ, options, x0 = f.meta.x0) + @info "TRDH relative error" norm(TRDH_out.solution - sol) / norm(sol) + # plot_bpdn(TRDH_out, sol, "constr-trdh-$(suffix)") @info " using LMTR to solve with" h χ reset!(nls) diff --git a/examples/demo-nnmf-constr.jl b/examples/demo-nnmf-constr.jl index 334f936a..8d0d7b10 100644 --- a/examples/demo-nnmf-constr.jl +++ b/examples/demo-nnmf-constr.jl @@ -13,12 +13,29 @@ function demo_solver(f, h, χ, selected, Avec, m, n, k, suffix = "l0-linf") @info " using TR to solve with" h χ reset!(f) TR_out = TR(f, h, χ, options, selected = selected) - plot_nnmf(TR_out, Avec, m, n, k, "tr-r2-$suffix") + # plot_nnmf(TR_out, Avec, m, n, k, "tr-r2-$suffix") @info " using R2 to solve with" h reset!(f) R2_out = R2(f, h, options, selected = selected) - plot_nnmf(R2_out, Avec, m, n, k, "r2-$suffix") + # plot_nnmf(R2_out, Avec, m, n, k, "r2-$suffix") + + options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 10, maxIter = 500, spectral = true) + @info " using TR with R2 as subproblem to solve with" h χ + reset!(f) + TR_out = TR(f, h, χ, options, selected = selected) + # plot_nnmf(TR_out, Avec, m, n, k, "tr-r2-$suffix") + + subsolver_options = ROSolverOptions(spectral = false, psb = true) + @info " using TR with TRDH as subproblem to solve with" h χ + reset!(f) + TR2_out = TR(f, h, χ, options, selected = selected, subsolver = TRDH, subsolver_options = subsolver_options) + # plot_nnmf(TR2_out, Avec, m, n, k, "tr-trdh-$suffix") + + @info " using TRDH to solve with" h χ + reset!(f) + TRDH_out = TRDH(f, h, χ, options, selected = selected) + # plot_nnmf(TRDH_out, Avec, m, n, k, "trdh-$suffix") end function demo_nnmf() From ffc182abafc7a54ba679c40fee6b58322fa5121f Mon Sep 17 00:00:00 2001 From: Geoffroy Leconte Date: Wed, 15 Feb 2023 15:13:45 -0500 Subject: [PATCH 03/12] fix issue TR without bounds and TRDH subsolver --- examples/demo-nnmf-constr.jl | 3 +-- src/TR_alg.jl | 15 +++++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/examples/demo-nnmf-constr.jl b/examples/demo-nnmf-constr.jl index 8d0d7b10..91f74071 100644 --- a/examples/demo-nnmf-constr.jl +++ b/examples/demo-nnmf-constr.jl @@ -9,7 +9,7 @@ include("plot-utils-nnmf.jl") Random.seed!(1234) function demo_solver(f, h, χ, selected, Avec, m, n, k, suffix = "l0-linf") - options = ROSolverOptions(ν = 1.0e-3, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 10, maxIter = 500) + options = ROSolverOptions(ν = 1.0e-3, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 10, maxIter = 500, spectral = true) @info " using TR to solve with" h χ reset!(f) TR_out = TR(f, h, χ, options, selected = selected) @@ -20,7 +20,6 @@ function demo_solver(f, h, χ, selected, Avec, m, n, k, suffix = "l0-linf") R2_out = R2(f, h, options, selected = selected) # plot_nnmf(R2_out, Avec, m, n, k, "r2-$suffix") - options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 10, maxIter = 500, spectral = true) @info " using TR with R2 as subproblem to solve with" h χ reset!(f) TR_out = TR(f, h, χ, options, selected = selected) diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 0a2e3f9a..2336351c 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -72,7 +72,7 @@ function TR( β = options.β local l_bound, u_bound - if has_bounds(f) + if has_bounds(f) || subsolver == TRDH l_bound = f.meta.lvar u_bound = f.meta.uvar end @@ -101,9 +101,8 @@ function TR( xkn = similar(xk) s = zero(xk) - ψ = - has_bounds(f) ? shifted(h, xk, max.(-Δk, l_bound - xk), min.(Δk, u_bound - xk), selected) : - shifted(h, xk, Δk, χ) + ψ = (has_bounds(f) || subsolver == TRDH) ? + shifted(h, xk, max.(-Δk, l_bound - xk), min.(Δk, u_bound - xk), selected) : shifted(h, xk, Δk, χ) Fobj_hist = zeros(maxIter) Hobj_hist = zeros(maxIter) @@ -170,7 +169,7 @@ function TR( subsolver_options.ϵa = k == 1 ? 1.0e-5 : max(ϵ, min(1e-2, sqrt(ξ1)) * ξ1) ∆_effective = min(β * χ(s), Δk) - has_bounds(f) ? + (has_bounds(f) || subsolver == TRDH) ? set_bounds!(ψ, max.(-∆_effective, l_bound - xk), min.(∆_effective, u_bound - xk)) : set_radius!(ψ, ∆_effective) s, iter, _ = with_logger(subsolver_logger) do @@ -203,12 +202,12 @@ function TR( if η2 ≤ ρk < Inf Δk = max(Δk, γ * sNorm) - !has_bounds(f) && set_radius!(ψ, Δk) + !(has_bounds(f) || subsolver == TRDH) && set_radius!(ψ, Δk) end if η1 ≤ ρk < Inf xk .= xkn - has_bounds(f) && set_bounds!(ψ, max.(-Δk, l_bound - xk), min.(Δk, u_bound - xk)) + (has_bounds(f) || subsolver == TRDH) && set_bounds!(ψ, max.(-Δk, l_bound - xk), min.(Δk, u_bound - xk)) #update functions fk = fkn @@ -227,7 +226,7 @@ function TR( if ρk < η1 || ρk == Inf Δk = Δk / 2 - has_bounds(f) ? set_bounds!(ψ, max.(-Δk, l_bound - xk), min.(Δk, u_bound - xk)) : + (has_bounds(f) || subsolver == TRDH) ? set_bounds!(ψ, max.(-Δk, l_bound - xk), min.(Δk, u_bound - xk)) : set_radius!(ψ, Δk) end tired = k ≥ maxIter || elapsed_time > maxTime From 3f93ae60d2eb2f97e2fa052506df389eaeb7d161 Mon Sep 17 00:00:00 2001 From: Geoffroy Leconte Date: Thu, 16 Feb 2023 18:41:56 -0500 Subject: [PATCH 04/12] update Dk init, no reduceTR param --- src/TRDH_alg.jl | 10 +++++----- src/TR_alg.jl | 6 +++--- src/input_struct.jl | 6 ++++-- src/utils.jl | 3 +++ 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 5e4d6629..fbdff52c 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -81,6 +81,7 @@ function TRDH( x0::AbstractVector{R}; χ::X = NormLinf(one(R)), selected::AbstractVector{<:Integer} = 1:length(x0), + Bk = I, kwargs..., ) where {R <: Real, F, G, H, X} start_time = time() @@ -99,6 +100,7 @@ function TRDH( spectral = options.spectral psb = options.psb hess_init_val = one(R) / options.ν + reduce_TR = options.reduce_TR local l_bound, u_bound has_bnds = false @@ -176,11 +178,10 @@ function TRDH( ∇f!(∇fk, xk) ∇fk⁻ = copy(∇fk) Dk = spectral ? SpectralGradient(hess_init_val, length(xk)) : - DiagonalQN(fill!(similar(xk), hess_init_val), psb) + ((Bk == I) ? DiagonalQN(fill!(similar(xk), hess_init_val), psb) : DiagonalQN(diag(Bk), psb)) νInv = (norm(Dk.d, Inf) + one(R) / (α * Δk)) ν = one(R) / νInv mν∇fk = -ν .* ∇fk - mdinv∇fk = .-∇fk ./ Dk.d optimal = false tired = maxIter > 0 && k ≥ maxIter || elapsed_time > maxTime @@ -196,7 +197,7 @@ function TRDH( mk1(d) = φ1(d) + ψ(d) prox!(s, ψ, mν∇fk, ν) - Complex_hist[k] += 1 + reduce_TR && (Complex_hist[k] += 1) ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() # ? ξ1 > 0 || error("TR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") @@ -210,7 +211,7 @@ function TRDH( continue end - Δ_effective = min(β * χ(s), Δk) + Δ_effective = reduce_TR ? min(β * χ(s), Δk) : Δk # update radius if has_bnds if is_subsolver @@ -283,7 +284,6 @@ function TRDH( ν = one(R) / νInv ∇fk⁻ .= ∇fk mν∇fk .= -ν .* ∇fk - mdinv∇fk .= .-∇fk ./ Dk.d end if ρk < η1 || ρk == Inf diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 2336351c..9d1741a5 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -172,10 +172,10 @@ function TR( (has_bounds(f) || subsolver == TRDH) ? set_bounds!(ψ, max.(-∆_effective, l_bound - xk), min.(∆_effective, u_bound - xk)) : set_radius!(ψ, ∆_effective) - s, iter, _ = with_logger(subsolver_logger) do - subsolver(φ, ∇φ!, ψ, subsolver_options, s) + s, iter, outdict = with_logger(subsolver_logger) do + subsolver(φ, ∇φ!, ψ, subsolver_options, s; Bk = Bk) end - Complex_hist[k] = iter + Complex_hist[k] = sum(outdict[:Chist]) sNorm = χ(s) xkn .= xk .+ s diff --git a/src/input_struct.jl b/src/input_struct.jl index a1bbd934..2ff0c799 100644 --- a/src/input_struct.jl +++ b/src/input_struct.jl @@ -17,7 +17,8 @@ mutable struct ROSolverOptions{R} θ::R # step length factor in relation to Hessian norm β::R # TR size as factor of first PG step spectral::Bool # for TRDH: use spectral gradient update if true, otherwise DiagonalQN - psb::Bool # for TRDH with DiagonalQN (spectral = false): use PSB update if true, otherwise Andrei update + psb::Bool # for TRDH with DiagonalQN (spectral = false): use PSB update if true, otherwise Andrei update + reduce_TR::Bool function ROSolverOptions{R}(; ϵa::R = √eps(R), @@ -37,6 +38,7 @@ mutable struct ROSolverOptions{R} β::R = 1 / eps(R), spectral::Bool = false, psb::Bool = false, + reduce_TR::Bool = true, ) where {R <: Real} @assert ϵa ≥ 0 @assert ϵr ≥ 0 @@ -52,7 +54,7 @@ mutable struct ROSolverOptions{R} @assert γ > 1 @assert θ > 0 @assert β ≥ 1 - return new{R}(ϵa, ϵr, neg_tol, Δk, verbose, maxIter, maxTime, σmin, η1, η2, α, ν, γ, θ, β, spectral, psb) + return new{R}(ϵa, ϵr, neg_tol, Δk, verbose, maxIter, maxTime, σmin, η1, η2, α, ν, γ, θ, β, spectral, psb, reduce_TR) end end diff --git a/src/utils.jl b/src/utils.jl index d09ba620..3d84154b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -17,3 +17,6 @@ ShiftedProximalOperators.iprox!( g::AbstractVector, D::SpectralGradient, ) = iprox!(y, ψ, g, fill!(similar(g), D.d[1])) + +LinearAlgebra.diag(op::DiagonalQN) = copy(op.d) +LinearAlgebra.diag(op::SpectralGradient{T}) where {T} = zeros(T, op.nrow) .* op.d[1] From 4bc748dd3cd0ea50f4b7357817f4bde9035d9dc4 Mon Sep 17 00:00:00 2001 From: Geoffroy Leconte Date: Fri, 17 Feb 2023 13:16:37 -0500 Subject: [PATCH 05/12] update delta --- src/TRDH_alg.jl | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index fbdff52c..6b500be5 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -73,6 +73,17 @@ function TRDH(nlp::AbstractNLPModel{R}, h, χ, options::ROSolverOptions{R}; kwar return stats end +# update l_bound_k and u_bound_k +function update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δ) + if is_subsolver + @. l_bound_k = max(xk - Δ, l_bound) + @. u_bound_k = min(xk + Δ, u_bound) + else + @. l_bound_k = max(-Δ, l_bound - xk) + @. u_bound_k = min(Δ, u_bound - xk) + end +end + function TRDH( f::F, ∇f!::G, @@ -179,7 +190,8 @@ function TRDH( ∇fk⁻ = copy(∇fk) Dk = spectral ? SpectralGradient(hess_init_val, length(xk)) : ((Bk == I) ? DiagonalQN(fill!(similar(xk), hess_init_val), psb) : DiagonalQN(diag(Bk), psb)) - νInv = (norm(Dk.d, Inf) + one(R) / (α * Δk)) + DkNorm = norm(Dk.d, Inf) + νInv = (DkNorm + one(R) / (α * Δk)) ν = one(R) / νInv mν∇fk = -ν .* ∇fk @@ -214,13 +226,7 @@ function TRDH( Δ_effective = reduce_TR ? min(β * χ(s), Δk) : Δk # update radius if has_bnds - if is_subsolver - @. l_bound_k = max(xk - Δ_effective, l_bound) - @. u_bound_k = min(xk + Δ_effective, u_bound) - else - @. l_bound_k = max(-Δ_effective, l_bound - xk) - @. u_bound_k = min(Δ_effective, u_bound - xk) - end + update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δ_effective) set_bounds!(ψ, l_bound_k, u_bound_k) else set_radius!(ψ, Δ_effective) @@ -256,23 +262,15 @@ function TRDH( #! format: on end - if has_bnds - if is_subsolver - @. l_bound_k = max(xk - Δ_effective, l_bound) - @. u_bound_k = min(xk + Δ_effective, u_bound) - else - @. l_bound_k = max(-Δ_effective, l_bound - xk) - @. u_bound_k = min(Δ_effective, u_bound - xk) - end - end - if η2 ≤ ρk < Inf Δk = max(Δk, γ * sNorm) + has_bnds && update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δk) !has_bnds && set_radius!(ψ, Δk) end if η1 ≤ ρk < Inf xk .= xkn + has_bnds && update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δk) has_bnds && set_bounds!(ψ, l_bound_k, u_bound_k) #update functions fk = fkn @@ -280,16 +278,20 @@ function TRDH( shift!(ψ, xk) ∇f!(∇fk, xk) push!(Dk, s, ∇fk - ∇fk⁻) # update QN operator - νInv = (norm(Dk.d, Inf) + one(R) / (α * Δk)) - ν = one(R) / νInv + DkNorm = norm(Dk.d, Inf) ∇fk⁻ .= ∇fk - mν∇fk .= -ν .* ∇fk end if ρk < η1 || ρk == Inf Δk = Δk / 2 + has_bnds && update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δk) has_bnds ? set_bounds!(ψ, l_bound_k, u_bound_k) : set_radius!(ψ, Δk) end + + νInv = (DkNorm + one(R) / (α * Δk)) + ν = one(R) / νInv + mν∇fk .= -ν .* ∇fk + tired = k ≥ maxIter || elapsed_time > maxTime end From ef3d23541a34ba7bbc984bab9667fe53712e79e0 Mon Sep 17 00:00:00 2001 From: Geoffroy Leconte Date: Fri, 17 Feb 2023 14:06:57 -0500 Subject: [PATCH 06/12] cv basd upon xi --- src/TRDH_alg.jl | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 6b500be5..7b3521d1 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -139,11 +139,11 @@ function TRDH( xk = copy(x0) hk = h(xk[selected]) if hk == Inf - verbose > 0 && @info "R2: finding initial guess where nonsmooth term is finite" + verbose > 0 && @info "TRDH: finding initial guess where nonsmooth term is finite" prox!(xk, h, x0, one(eltype(x0))) hk = h(xk[selected]) hk < Inf || error("prox computation must be erroneous") - verbose > 0 && @debug "R2: found point where h has value" hk + verbose > 0 && @debug "TRDH: found point where h has value" hk end hk == -Inf && error("nonsmooth term is not proper") @@ -182,6 +182,7 @@ function TRDH( end local ξ1 + local ξ k = 0 fk = f(xk) @@ -208,19 +209,21 @@ function TRDH( φ1(d) = ∇fk' * d mk1(d) = φ1(d) + ψ(d) - prox!(s, ψ, mν∇fk, ν) - reduce_TR && (Complex_hist[k] += 1) - ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() # ? - ξ1 > 0 || error("TR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - - if ξ1 ≥ 0 && k == 1 - ϵ += ϵr * sqrt(ξ1) # make stopping test absolute and relative - end - - if sqrt(ξ1) < ϵ - # the current xk is approximately first-order stationary - optimal = true - continue + if reduce_TR + prox!(s, ψ, mν∇fk, ν) + Complex_hist[k] += 1 + ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() + ξ1 > 0 || error("TR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + + if ξ1 ≥ 0 && k == 1 + ϵ += ϵr * sqrt(ξ1) # make stopping test absolute and relative + end + + if sqrt(ξ1) < ϵ + # the current xk is approximately first-order stationary + optimal = true + continue + end end Δ_effective = reduce_TR ? min(β * χ(s), Δk) : Δk @@ -252,6 +255,18 @@ function TRDH( error("TRDH: failed to compute a step: ξ = $ξ") end + if !reduce_TR + if ξ ≥ 0 && k == 1 + ϵ += ϵr * sqrt(ξ) # make stopping test absolute and relative + end + + if sqrt(ξ) < ϵ + # the current xk is approximately first-order stationary + optimal = true + continue + end + end + ρk = Δobj / ξ TR_stat = (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "=") @@ -264,7 +279,6 @@ function TRDH( if η2 ≤ ρk < Inf Δk = max(Δk, γ * sNorm) - has_bnds && update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δk) !has_bnds && set_radius!(ψ, Δk) end @@ -306,6 +320,8 @@ function TRDH( end end + !reduce_TR && (ξ1 = ξ) # for output dict + status = if optimal :first_order elseif elapsed_time > maxTime From 272720c7f80e877297636bd793c0ff2050c53781 Mon Sep 17 00:00:00 2001 From: Geoffroy Leconte Date: Fri, 17 Feb 2023 15:09:11 -0500 Subject: [PATCH 07/12] fix verbose issues --- src/TRDH_alg.jl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 7b3521d1..8e2fbcec 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -177,7 +177,11 @@ function TRDH( Complex_hist = zeros(Int, maxIter) if verbose > 0 #! format: off - @info @sprintf "%6s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "‖Bₖ‖" "TRDH" + if reduce_TR + @info @sprintf "%6s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "‖Bₖ‖" "TRDH" + else + @info @sprintf "%6s %8s %8s %7s %8s %7s %7s %7s %7s %1s" "outer" "f(x)" "h(x)" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "‖Bₖ‖" "TRDH" + end #! format: off end @@ -273,7 +277,11 @@ function TRDH( if (verbose > 0) && (k % ptf == 0) #! format: off - @info @sprintf "%6d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt(ξ1) sqrt(ξ) ρk Δk χ(xk) sNorm norm(Dk.d) TR_stat + if reduce_TR + @info @sprintf "%6d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt(ξ1) sqrt(ξ) ρk Δk χ(xk) sNorm norm(Dk.d) TR_stat + else + @info @sprintf "%6d %8.1e %8.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt(ξ) ρk Δk χ(xk) sNorm norm(Dk.d) TR_stat + end #! format: on end From a9aba110fc3d67ee3c66b6d3359370b42771cdd8 Mon Sep 17 00:00:00 2001 From: Geoffroy Leconte Date: Fri, 17 Feb 2023 15:14:47 -0500 Subject: [PATCH 08/12] benchmark tables --- benchmarks/tables/bpdn-constr-table.jl | 26 ++++ benchmarks/tables/bpdn-table.jl | 28 +++++ benchmarks/tables/fh-table.jl | 30 +++++ benchmarks/tables/nnmf-table.jl | 26 ++++ benchmarks/tables/regulopt-tables.jl | 157 +++++++++++++++++++++++++ benchmarks/tables/svm-table.jl | 29 +++++ 6 files changed, 296 insertions(+) create mode 100644 benchmarks/tables/bpdn-constr-table.jl create mode 100644 benchmarks/tables/bpdn-table.jl create mode 100644 benchmarks/tables/fh-table.jl create mode 100644 benchmarks/tables/nnmf-table.jl create mode 100644 benchmarks/tables/regulopt-tables.jl create mode 100644 benchmarks/tables/svm-table.jl diff --git a/benchmarks/tables/bpdn-constr-table.jl b/benchmarks/tables/bpdn-constr-table.jl new file mode 100644 index 00000000..65e72a38 --- /dev/null +++ b/benchmarks/tables/bpdn-constr-table.jl @@ -0,0 +1,26 @@ +include("regulopt-tables.jl") + +# model +Random.seed!(1234) +compound = 1 +model, nls_model, sol = bpdn_model(compound, bounds = true) + +f = LSR1Model(model) +λ = norm(grad(model, zeros(model.meta.nvar)), Inf) / 10 +h = NormL1(λ) + +verbose = 0 # 10 +options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = true) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=20) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = 1e-6, ϵr = 1e-6, maxIter=20) +options4 = ROSolverOptions(spectral = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=20) +options5 = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = true) +options6 = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = false) +options7 = ROSolverOptions(spectral = false, psb = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=20, reduce_TR = false) + +solvers = [:R2, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR] +subsolvers = [:None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH] +solver_options = [options, options, options5, options6, options, options, options, options, options] +subsolver_options = [options2, options2, options2, options2, options2, options7, options2, options3, options4] # n'importe lequel si subsolver = :None + +benchmark_table(f, 1:f.meta.nvar, sol, h, λ, solvers, subsolvers, solver_options, subsolver_options, "BPDN-cstr") \ No newline at end of file diff --git a/benchmarks/tables/bpdn-table.jl b/benchmarks/tables/bpdn-table.jl new file mode 100644 index 00000000..644567aa --- /dev/null +++ b/benchmarks/tables/bpdn-table.jl @@ -0,0 +1,28 @@ +include("regulopt-tables.jl") + +# model +Random.seed!(1234) +compound = 1 +model, nls_model, sol = bpdn_model(compound, bounds = false) + +# parameters +f = LSR1Model(model) +λ = norm(grad(model, zeros(model.meta.nvar)), Inf) / 10 +h = NormL1(λ) + +verbose = 0 # 10 +options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = true) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) +options4 = ROSolverOptions(spectral = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) +options5 = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = true) +options6 = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = false) +options7 = ROSolverOptions(spectral = false, psb = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=40, reduce_TR = false) +options8 = ROSolverOptions(spectral = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=40, reduce_TR = false) + +solvers = [:R2, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR, :TR] +subsolvers = [:None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH] +solver_options = [options, options, options5, options6, options, options, options, options, options, options] +subsolver_options = [options2, options2, options2, options2, options2, options7, options2, options3, options4, options8] # n'importe lequel si subsolver = :None + +benchmark_table(f, 1:f.meta.nvar, sol, h, λ, solvers, subsolvers, solver_options, subsolver_options, "BPDN") \ No newline at end of file diff --git a/benchmarks/tables/fh-table.jl b/benchmarks/tables/fh-table.jl new file mode 100644 index 00000000..34216108 --- /dev/null +++ b/benchmarks/tables/fh-table.jl @@ -0,0 +1,30 @@ +include("regulopt-tables.jl") +using ADNLPModels, DifferentialEquations + +Random.seed!(1234) +data, simulate, resid, misfit = RegularizedProblems.FH_smooth_term() +model = ADNLPModel(misfit, ones(5)) +f = LBFGSModel(model) + +λ = 1.0 +h = NormL0(λ) +ν = 1.0e2 +verbose = 0 #10 +maxIter_sub = 200 +options = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = true) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = 1e-6, ϵr = 1e-6, maxIter = maxIter_sub) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = 1e-6, ϵr = 1e-6, maxIter = maxIter_sub) +options4 = ROSolverOptions(spectral = true, ϵa = 1e-6, ϵr = 1e-6, maxIter = maxIter_sub) +options5 = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = true) +options6 = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = false) +options7 = ROSolverOptions(spectral = false, psb = true, reduce_TR = false, ϵa = 1e-6, ϵr = 1e-6, maxIter = maxIter_sub) +options8 = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = false, reduce_TR = false) + +solvers = [:R2, :TRDH, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR] +subsolvers = [:None, :None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH] +solver_options = [options, options, options5, options6, options8, options, options, options, options, options] +subsolver_options = [options2, options2, options2, options2, options2, options2, options7, options2, options3, options4] +subset = 2:10 # issues with R2 alone + +benchmark_table(f, 1:f.meta.nvar, [], h, λ, solvers[subset], subsolvers[subset], solver_options[subset], subsolver_options[subset], + "FH with ν = $ν, λ = $λ") \ No newline at end of file diff --git a/benchmarks/tables/nnmf-table.jl b/benchmarks/tables/nnmf-table.jl new file mode 100644 index 00000000..9a08ec9d --- /dev/null +++ b/benchmarks/tables/nnmf-table.jl @@ -0,0 +1,26 @@ +include("regulopt-tables.jl") + +Random.seed!(1234) +m, n, k = 100, 50, 5 +model, A, selected = nnmf_model(m, n, k) +f = LSR1Model(model) +λ = norm(grad(model, rand(model.meta.nvar)), Inf) / 1000 +h = NormL0(λ) +ν = 1.0 +verbose = 0 #10 +options = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = true) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) +options4 = ROSolverOptions(spectral = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) +options5 = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = true) +options6 = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = false) +options7 = ROSolverOptions(spectral = false, psb = true, reduce_TR = false, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) +options8 = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = false, reduce_TR = false) + +solvers = [:R2, :TRDH, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR, :TR] +subsolvers = [:None, :None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH] +solver_options = [options, options, options5, options6, options8, options, options, options, options, options] +subsolver_options = [options2, options2, options2, options2, options2, options2, options7, options2, options3, options4] + +benchmark_table(f, selected, [], h, λ, solvers, subsolvers, solver_options, subsolver_options, + "NNMF with m = $m, n = $n, k = $k, ν = $ν, λ = $λ") \ No newline at end of file diff --git a/benchmarks/tables/regulopt-tables.jl b/benchmarks/tables/regulopt-tables.jl new file mode 100644 index 00000000..674698b9 --- /dev/null +++ b/benchmarks/tables/regulopt-tables.jl @@ -0,0 +1,157 @@ +using PrettyTables +using Random +using LinearAlgebra +using ProximalOperators +using NLPModels, NLPModelsModifiers, RegularizedProblems, RegularizedOptimization, ShiftedProximalOperators, SolverBenchmark +using Printf + +# utils for extracting stats / display table +modelname(nlp::LSR1Model) = "LSR1" +modelname(nlp::LBFGSModel) = "LBFGS" +modelname(nlp::SpectralGradientModel) = "SpectralGradient" +modelname(nlp::DiagonalQNModel) = "DiagonalQN" +subsolvername(subsolver::Symbol) = subsolver == :None ? "" : string("-", subsolver) +function options_str(options::ROSolverOptions, solver::Symbol, subsolver_options::ROSolverOptions, subsolver::Symbol) + if solver == :TRDH + out_str = !options.spectral ? (options.psb ? "-DiagQN-PSB" : "-DiagQN-Andrei") : "-Spectral" + out_str = (options.reduce_TR) ? out_str : string(out_str, "-noredTR") + elseif solver == :TR && subsolver == :TRDH + out_str = !subsolver_options.spectral ? (subsolver_options.psb ? "-DiagQN-PSB" : "-DiagQN-Andrei") : "-Spectral" + out_str = (subsolver_options.reduce_TR) ? out_str : string(out_str, "-noredTR") + else + out_str = "" + end + return out_str +end +grad_evals(nlp::AbstractNLPModel) = neval_grad(nlp) +grad_evals(nls::AbstractNLSModel) = neval_jtprod_residual(nls) + neval_jprod_residual(nls) +function nb_prox_evals(stats, solver::Symbol) + if solver ∈ [:TR, :R2, :TRDH] + prox_evals = sum(stats.solver_specific[:SubsolverCounter]) + else + error("not implemented") + end + return prox_evals +end + +function benchmark_table( + f::AbstractNLPModel, + selected, + sol, + h, + λ, + solvers, + subsolvers, + solver_options, + subsolver_options, + pb_name::String, +) + + row_names = ["$(solver)$(subsolvername(subsolver))$(options_str(opt, solver, subsolver_opt, subsolver))" + for (solver, opt, subsolver, subsolver_opt) in zip(solvers, solver_options, subsolvers, subsolver_options)] + + n∇f_evals = [] + nprox_evals = [] + solver_stats = [] + + for (solver, subsolver, opt, sub_opt) in zip(solvers, subsolvers, solver_options, subsolver_options) + @info " using $solver with subsolver = $subsolver" + args = solver == :R2 ? () : (NormLinf(1.0),) + if subsolver == :None + solver_out = eval(solver)(f, h, args..., opt, x0 = f.meta.x0, selected = selected) + else + solver_out = eval(solver)(f, h, args..., opt, x0 = f.meta.x0, subsolver = eval(subsolver), + subsolver_options = sub_opt, selected = selected) + end + push!(n∇f_evals, grad_evals(f)) + push!(nprox_evals, nb_prox_evals(solver_out, solver)) + push!(solver_stats, solver_out) + reset!(f) + + end + + if length(sol) == 0 + header = [ + "f(x)", + "h(x)/λ", + "ξ", + "∇f evals", + "prox calls", + ] + else + header = [ + "f(x) (true = $(round(obj(model, sol); sigdigits = 4)))", + "h(x)/λ", + "ξ", + "||x-x*||/||x*||", + "∇f evals", + "prox calls", + ] + end + + n_solvers = length(row_names) + data = Matrix{Any}(undef, n_solvers, length(header)) + for i=1:n_solvers + solver_out = solver_stats[i] + x = solver_out.solution + fx = solver_out.solver_specific[:Fhist][end] + hx = solver_out.solver_specific[:Hhist][end] + ξ = solver_out.dual_feas + n∇f = n∇f_evals[i] + nprox = nprox_evals[i] + if length(sol) == 0 + data[i, :] .= [fx, hx / λ, ξ, n∇f, nprox] + else + err = norm(x - sol) / norm(sol) + data[i, :] .= [fx, hx / λ, ξ, err, n∇f, nprox] + end + end + + if length(sol) == 0 + print_formats = ft_printf(["%7.3e", "%7.1e", "%7.1e", "%i","%i"], 1:length(header)) + else + print_formats = ft_printf(["%7.3e", "%7.1e", "%7.1e", "%7.3e", "%i","%i"], 1:length(header)) + end + + return pretty_table(data; + header = header, + row_names= row_names, + title = "$pb_name $(modelname(f)) $(typeof(h).name.name)", + # backend = Val(:latex), + formatters = (print_formats, + # (v, i, j) -> (SolverBenchmark.safe_latex_AbstractFloat(v)), + ), + ) +end + + + +# λ = norm(grad(model, rand(model.meta.nvar)), Inf) / 100000 +# h = NormL1(λ) +# benchmark_table(f, selected, [], h, λ, solvers, subsolvers, solver_options, subsolver_options, +# "NNMF with m = $m, n = $n, k = $k, ν = 1.0e-3,") + + + + +# header = ["TR LSR1 L0Box", "R2 LSR1 L0Box", "LM L0Box", "LMTR L0Box"] +# TR_out = TR(f, h, χ, options, x0 = f.meta.x0) +# n∇f_TR = neval_grad(f) +# prox_evals_TR = sum(TR_out.solver_specific[:SubsolverCounter]) +# reset!(f) +# R2_out = R2(f, h, options, x0 = f.meta.x0) +# n∇f_R2 = neval_grad(f) +# prox_evals_R2 = R2_out.iter +# reset!(f) +# LM_out = LM(nls_model, h, options, x0 = nls_model.meta.x0) +# n∇f_LM = neval_jtprod_residual(nls_model) + neval_jprod_residual(nls_model) +# prox_evals_LM = sum(LM_out.solver_specific[:SubsolverCounter]) +# reset!(nls_model) +# LMTR_out = LMTR(nls_model, h, χ, options, x0 = nls_model.meta.x0) +# n∇f_LMTR = neval_jtprod_residual(nls_model) + neval_jprod_residual(nls_model) +# prox_evals_LMTR = sum(LMTR_out.solver_specific[:SubsolverCounter]) +# reset!(nls_model) +# n∇f_evals = [n∇f_TR, n∇f_R2, n∇f_LM, n∇f_LMTR] +# nprox_evals = [prox_evals_TR, prox_evals_R2, prox_evals_LM, prox_evals_LMTR] + +# solver_stats = [TR_out, R2_out, LM_out, LMTR_out] \ No newline at end of file diff --git a/benchmarks/tables/svm-table.jl b/benchmarks/tables/svm-table.jl new file mode 100644 index 00000000..da662dde --- /dev/null +++ b/benchmarks/tables/svm-table.jl @@ -0,0 +1,29 @@ +include("regulopt-tables.jl") +using MLDatasets + +Random.seed!(1234) +nlp_train, nls_train, sol_train = RegularizedProblems.svm_train_model() +nlp_test, nls_test, sol_test = RegularizedProblems.svm_test_model() +f = LSR1Model(nlp_train) +f_test = LSR1Model(nlp_test) +λ = 10.0 #norm(grad(model, rand(model.meta.nvar)), Inf) / 10 +h = NormL1(λ) + +ν = 1.0e0 +verbose = 0 #10 +options = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = true) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) +options4 = ROSolverOptions(spectral = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) +options5 = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = true) +options6 = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = false) +options7 = ROSolverOptions(spectral = false, psb = true, reduce_TR = false, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) + +solvers = [:R2, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR] +subsolvers = [:None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH] +solver_options = [options, options, options5, options6, options, options, options, options, options] +subsolver_options = [options2, options2, options2, options2, options2, options2, options7, options3, options4] +subset = 1:9 + +benchmark_table(f, 1:f.meta.nvar, [], h, λ, solvers[subset], subsolvers[subset], solver_options[subset], subsolver_options[subset], + "SVM with ν = $ν, λ = $λ") \ No newline at end of file From e33c8d03ce8ff5418c62a28ce44bef5e09a58167 Mon Sep 17 00:00:00 2001 From: Geoffroy Leconte Date: Wed, 1 Mar 2023 13:41:18 -0500 Subject: [PATCH 09/12] update Bk default value, benchmark params --- benchmarks/tables/bpdn-constr-table.jl | 17 ++++++++++------- benchmarks/tables/bpdn-table.jl | 19 +++++++++++-------- benchmarks/tables/fh-table.jl | 22 ++++++++++++---------- benchmarks/tables/nnmf-table.jl | 21 ++++++++++++--------- benchmarks/tables/svm-table.jl | 17 ++++++++++------- src/TRDH_alg.jl | 6 +++--- 6 files changed, 58 insertions(+), 44 deletions(-) diff --git a/benchmarks/tables/bpdn-constr-table.jl b/benchmarks/tables/bpdn-constr-table.jl index 65e72a38..419901c5 100644 --- a/benchmarks/tables/bpdn-constr-table.jl +++ b/benchmarks/tables/bpdn-constr-table.jl @@ -10,13 +10,16 @@ f = LSR1Model(model) h = NormL1(λ) verbose = 0 # 10 -options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = true) -options2 = ROSolverOptions(spectral = false, psb = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=20) -options3 = ROSolverOptions(spectral = false, psb = false, ϵa = 1e-6, ϵr = 1e-6, maxIter=20) -options4 = ROSolverOptions(spectral = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=20) -options5 = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = true) -options6 = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = false) -options7 = ROSolverOptions(spectral = false, psb = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=20, reduce_TR = false) +ϵ = 1.0e-6 +maxIter = 500 +maxIter_inner = 20 +options = ROSolverOptions(ν = 1.0, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options4 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options6 = ROSolverOptions(ν = 1.0, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = false) +options5 = ROSolverOptions(ν = 1.0, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = true) +options7 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner, reduce_TR = false) solvers = [:R2, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR] subsolvers = [:None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH] diff --git a/benchmarks/tables/bpdn-table.jl b/benchmarks/tables/bpdn-table.jl index 644567aa..9c896d0d 100644 --- a/benchmarks/tables/bpdn-table.jl +++ b/benchmarks/tables/bpdn-table.jl @@ -11,14 +11,17 @@ f = LSR1Model(model) h = NormL1(λ) verbose = 0 # 10 -options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = true) -options2 = ROSolverOptions(spectral = false, psb = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) -options3 = ROSolverOptions(spectral = false, psb = false, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) -options4 = ROSolverOptions(spectral = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) -options5 = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = true) -options6 = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = false) -options7 = ROSolverOptions(spectral = false, psb = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=40, reduce_TR = false) -options8 = ROSolverOptions(spectral = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=40, reduce_TR = false) +ϵ = 1.0e-6 +maxIter = 500 +maxIter_inner = 40 +options = ROSolverOptions(ν = 1.0, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options4 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options6 = ROSolverOptions(ν = 1.0, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = false) +options5 = ROSolverOptions(ν = 1.0, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = true) +options7 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner, reduce_TR = false) +options8 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner, reduce_TR = false) solvers = [:R2, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR, :TR] subsolvers = [:None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH] diff --git a/benchmarks/tables/fh-table.jl b/benchmarks/tables/fh-table.jl index 34216108..52ad6c18 100644 --- a/benchmarks/tables/fh-table.jl +++ b/benchmarks/tables/fh-table.jl @@ -10,15 +10,17 @@ f = LBFGSModel(model) h = NormL0(λ) ν = 1.0e2 verbose = 0 #10 -maxIter_sub = 200 -options = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = true) -options2 = ROSolverOptions(spectral = false, psb = true, ϵa = 1e-6, ϵr = 1e-6, maxIter = maxIter_sub) -options3 = ROSolverOptions(spectral = false, psb = false, ϵa = 1e-6, ϵr = 1e-6, maxIter = maxIter_sub) -options4 = ROSolverOptions(spectral = true, ϵa = 1e-6, ϵr = 1e-6, maxIter = maxIter_sub) -options5 = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = true) -options6 = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = false) -options7 = ROSolverOptions(spectral = false, psb = true, reduce_TR = false, ϵa = 1e-6, ϵr = 1e-6, maxIter = maxIter_sub) -options8 = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = false, reduce_TR = false) +maxIter = 1000 +maxIter_sub = 200 # max iter for subsolver +ϵ = 1.0e-6 +options = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_sub) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_sub) +options4 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_sub) +options5 = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = true) +options6 = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = false) +options7 = ROSolverOptions(spectral = false, psb = true, reduce_TR = false, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_sub) +options8 = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = false, reduce_TR = false) solvers = [:R2, :TRDH, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR] subsolvers = [:None, :None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH] @@ -27,4 +29,4 @@ subsolver_options = [options2, options2, options2, options2, options2, options2, subset = 2:10 # issues with R2 alone benchmark_table(f, 1:f.meta.nvar, [], h, λ, solvers[subset], subsolvers[subset], solver_options[subset], subsolver_options[subset], - "FH with ν = $ν, λ = $λ") \ No newline at end of file + "FH with ν = $ν, λ = $λ") diff --git a/benchmarks/tables/nnmf-table.jl b/benchmarks/tables/nnmf-table.jl index 9a08ec9d..e114e752 100644 --- a/benchmarks/tables/nnmf-table.jl +++ b/benchmarks/tables/nnmf-table.jl @@ -4,18 +4,21 @@ Random.seed!(1234) m, n, k = 100, 50, 5 model, A, selected = nnmf_model(m, n, k) f = LSR1Model(model) -λ = norm(grad(model, rand(model.meta.nvar)), Inf) / 1000 +λ = norm(grad(model, rand(model.meta.nvar)), Inf) / 100 h = NormL0(λ) ν = 1.0 +ϵ = 1.0e-6 +maxIter = 500 +maxIter_inner = 40 verbose = 0 #10 -options = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = true) -options2 = ROSolverOptions(spectral = false, psb = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) -options3 = ROSolverOptions(spectral = false, psb = false, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) -options4 = ROSolverOptions(spectral = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) -options5 = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = true) -options6 = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = false) -options7 = ROSolverOptions(spectral = false, psb = true, reduce_TR = false, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) -options8 = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = false, reduce_TR = false) +options = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options4 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options5 = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = true) +options6 = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = false) +options7 = ROSolverOptions(spectral = false, psb = true, reduce_TR = false, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options8 = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = false, reduce_TR = false) solvers = [:R2, :TRDH, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR, :TR] subsolvers = [:None, :None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH] diff --git a/benchmarks/tables/svm-table.jl b/benchmarks/tables/svm-table.jl index da662dde..05be38eb 100644 --- a/benchmarks/tables/svm-table.jl +++ b/benchmarks/tables/svm-table.jl @@ -11,13 +11,16 @@ h = NormL1(λ) ν = 1.0e0 verbose = 0 #10 -options = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = true) -options2 = ROSolverOptions(spectral = false, psb = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) -options3 = ROSolverOptions(spectral = false, psb = false, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) -options4 = ROSolverOptions(spectral = true, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) -options5 = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = true) -options6 = ROSolverOptions(ν = ν, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = verbose, maxIter = 500, spectral = false, psb = false) -options7 = ROSolverOptions(spectral = false, psb = true, reduce_TR = false, ϵa = 1e-6, ϵr = 1e-6, maxIter=40) +ϵ = 1.0e-6 +maxIter = 500 +maxIter_inner = 40 +options = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options4 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options5 = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = true) +options6 = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = false) +options7 = ROSolverOptions(spectral = false, psb = true, reduce_TR = false, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) solvers = [:R2, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR] subsolvers = [:None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH] diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 8e2fbcec..d4f6ca43 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -92,7 +92,7 @@ function TRDH( x0::AbstractVector{R}; χ::X = NormLinf(one(R)), selected::AbstractVector{<:Integer} = 1:length(x0), - Bk = I, + Bk = (one(R) / options.ν) * I, kwargs..., ) where {R <: Real, F, G, H, X} start_time = time() @@ -110,7 +110,7 @@ function TRDH( β = options.β spectral = options.spectral psb = options.psb - hess_init_val = one(R) / options.ν + hess_init_val = (Bk isa UniformScaling) ? Bk.λ : (one(R) / options.ν) reduce_TR = options.reduce_TR local l_bound, u_bound @@ -194,7 +194,7 @@ function TRDH( ∇f!(∇fk, xk) ∇fk⁻ = copy(∇fk) Dk = spectral ? SpectralGradient(hess_init_val, length(xk)) : - ((Bk == I) ? DiagonalQN(fill!(similar(xk), hess_init_val), psb) : DiagonalQN(diag(Bk), psb)) + ((Bk isa UniformScaling) ? DiagonalQN(fill!(similar(xk), hess_init_val), psb) : DiagonalQN(diag(Bk), psb)) DkNorm = norm(Dk.d, Inf) νInv = (DkNorm + one(R) / (α * Δk)) ν = one(R) / νInv From 234a23a28a3a9bcb8ab2a14f4865be2196ed6007 Mon Sep 17 00:00:00 2001 From: Geoffroy Leconte Date: Fri, 3 Mar 2023 10:46:54 -0500 Subject: [PATCH 10/12] change TR radius subproblem, update docs TRDH, update params tests --- Project.toml | 2 +- benchmarks/tables/fh-table.jl | 2 +- benchmarks/tables/nnmf-table.jl | 2 +- src/TRDH_alg.jl | 25 ++++++++++++++----------- src/TR_alg.jl | 1 + 5 files changed, 18 insertions(+), 14 deletions(-) diff --git a/Project.toml b/Project.toml index 9ea028e7..a6a04161 100644 --- a/Project.toml +++ b/Project.toml @@ -16,7 +16,7 @@ SolverCore = "ff4d7338-4cf1-434d-91df-b86cb86fb843" TSVD = "9449cd9e-2762-5aa3-a617-5413e99d722e" [compat] -LinearOperators = "2.5.1" +LinearOperators = "2.5.2" NLPModels = "0.19" NLPModelsModifiers = "0.6" ProximalOperators = "0.15" diff --git a/benchmarks/tables/fh-table.jl b/benchmarks/tables/fh-table.jl index 52ad6c18..dacbd84c 100644 --- a/benchmarks/tables/fh-table.jl +++ b/benchmarks/tables/fh-table.jl @@ -12,7 +12,7 @@ h = NormL0(λ) verbose = 0 #10 maxIter = 1000 maxIter_sub = 200 # max iter for subsolver -ϵ = 1.0e-6 +ϵ = 1.0e-4 options = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_sub) options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_sub) diff --git a/benchmarks/tables/nnmf-table.jl b/benchmarks/tables/nnmf-table.jl index e114e752..7f6b1a61 100644 --- a/benchmarks/tables/nnmf-table.jl +++ b/benchmarks/tables/nnmf-table.jl @@ -7,7 +7,7 @@ f = LSR1Model(model) λ = norm(grad(model, rand(model.meta.nvar)), Inf) / 100 h = NormL0(λ) ν = 1.0 -ϵ = 1.0e-6 +ϵ = 1.0e-5 maxIter = 500 maxIter_inner = 40 verbose = 0 #10 diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index d4f6ca43..b0486864 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -2,8 +2,9 @@ export TRDH """ TRDH(nlp, h, χ, options; kwargs...) + TRDH(f, ∇f!, h, options, x0) -A trust-region method for the problem +A trust-region method with diagonal Hessian approximation for the problem min f(x) + h(x) @@ -14,10 +15,9 @@ About each iterate xₖ, a step sₖ is computed as an approximate solution of min φ(s; xₖ) + ψ(s; xₖ) subject to ‖s‖ ≤ Δₖ -where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ Bₖ s is a quadratic approximation of f about xₖ, -ψ(s; xₖ) = h(xₖ + s), ‖⋅‖ is a user-defined norm and Δₖ > 0 is the trust-region radius. -The subproblem is solved inexactly by way of a first-order method such as the proximal-gradient -method or the quadratic regularization method. +where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ Dₖ s is a quadratic approximation of f about xₖ, +ψ(s; xₖ) = h(xₖ + s), ‖⋅‖ is a user-defined norm, Dₖ is a diagonal Hessian approximation +and Δₖ > 0 is the trust-region radius. ### Arguments @@ -26,16 +26,19 @@ method or the quadratic regularization method. * `χ`: a norm used to define the trust region in the form of a regularizer * `options::ROSolverOptions`: a structure containing algorithmic parameters -The objective, gradient and Hessian of `nlp` will be accessed. -The Hessian is accessed as an abstract operator and need not be the exact Hessian. +The objective and gradient of `nlp` will be accessed. + +In the second form, instead of `nlp`, the user may pass in + +* `f` a function such that `f(x)` returns the value of f at x +* `∇f!` a function to evaluate the gradient in place, i.e., such that `∇f!(g, x)` store ∇f(x) in `g` +* `x0::AbstractVector`: an initial guess. ### Keyword arguments * `x0::AbstractVector`: an initial guess (default: `nlp.meta.x0`) -* `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver (default: the null logger) -* `subsolver`: the procedure used to compute a step (`PG` or `R2`) -* `subsolver_options::ROSolverOptions`: default options to pass to the subsolver (default: all defaut options) -* `selected::AbstractVector{<:Integer}`: (default `1:f.meta.nvar`). +* `selected::AbstractVector{<:Integer}`: (default `1:f.meta.nvar`) +* `Bk`: initial diagonal Hessian approximation (default: `(one(R) / options.ν) * I`). ### Return values diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 9d1741a5..3841e69c 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -172,6 +172,7 @@ function TR( (has_bounds(f) || subsolver == TRDH) ? set_bounds!(ψ, max.(-∆_effective, l_bound - xk), min.(∆_effective, u_bound - xk)) : set_radius!(ψ, ∆_effective) + subsolver_options.Δk = ∆_effective / 10 s, iter, outdict = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_options, s; Bk = Bk) end From c56814a2f2c816d0a56cb3b8bbf009a32e1629e2 Mon Sep 17 00:00:00 2001 From: Geoffroy Leconte Date: Tue, 7 Mar 2023 18:09:25 -0500 Subject: [PATCH 11/12] change bounds kwargs scanning --- src/TRDH_alg.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index b0486864..34e4f7a2 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -118,14 +118,14 @@ function TRDH( local l_bound, u_bound has_bnds = false - for (key, val) in kwargs - if key == :l_bound - l_bound = val - has_bnds = has_bnds || any(l_bound .!= R(-Inf)) - elseif key == :u_bound - u_bound = val - has_bnds = has_bnds || any(u_bound .!= R(Inf)) - end + kw_keys = keys(kwargs) + if :l_bound in kw_keys + l_bound = kwargs[:l_bound] + has_bnds = has_bnds || any(l_bound .!= R(-Inf)) + end + if :u_bound in kw_keys + u_bound = kwargs[:u_bound] + has_bnds = has_bnds || any(u_bound .!= R(Inf)) end if verbose == 0 From 13f2990943c49afde3121c46c1f2cb7e0d4991f6 Mon Sep 17 00:00:00 2001 From: Geoffroy Leconte Date: Wed, 8 Mar 2023 00:27:11 -0500 Subject: [PATCH 12/12] fix last verbose TRDH with reduceTR set to false --- src/TRDH_alg.jl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 34e4f7a2..126ae2f1 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -325,9 +325,14 @@ function TRDH( @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk elseif optimal #! format: off - @info @sprintf "%6d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) νInv - #! format: on - @info "TRDH: terminating with √ξ1 = $(sqrt(ξ1))" + if reduce_TR + @info @sprintf "%6d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) νInv + #! format: on + @info "TRDH: terminating with √ξ1 = $(sqrt(ξ1))" + else + @info @sprintf "%6d %8.1e %8.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k fk hk sqrt(ξ) "" Δk χ(xk) χ(s) νInv + @info "TRDH: terminating with √ξ = $(sqrt(ξ))" + end end end