diff --git a/.cirrus.yml b/.cirrus.yml deleted file mode 100644 index ad5fb1d9..00000000 --- a/.cirrus.yml +++ /dev/null @@ -1,26 +0,0 @@ -task: - matrix: - - name: FreeBSD - freebsd_instance: - image_family: freebsd-14-2 - env: - matrix: - - JULIA_VERSION: 1 - install_script: | - URL="https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh" - set -x - if [ "$(uname -s)" = "Linux" ] && command -v apt; then - apt update - apt install -y curl - fi - if command -v curl; then - sh -c "$(curl ${URL})" - elif command -v wget; then - sh -c "$(wget ${URL} -q -O-)" - elif command -v fetch; then - sh -c "$(fetch ${URL} -o -)" - fi - build_script: - - cirrusjl build - test_script: - - cirrusjl test diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml index c0e82906..1f6fc01c 100644 --- a/.github/workflows/draft-pdf.yml +++ b/.github/workflows/draft-pdf.yml @@ -21,7 +21,7 @@ jobs: journal: joss # This should be the path to the paper within your repo. paper-path: paper/paper.md - - name: Upload + - name: Upload pdf artifact uses: actions/upload-artifact@v4 with: name: paper @@ -29,3 +29,24 @@ jobs: # PDF. Note, this should be the same directory as the input # paper.md path: paper/paper.pdf + - name: Create release + if: github.event_name == 'push' + uses: rymndhng/release-on-push-action@master + id: release + with: + bump_version_scheme: patch + tag_prefix: v + release_body: "" + use_github_release_notes: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Upload PDF to release + if: github.event_name == 'push' + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: paper/paper.pdf + asset_name: joss-draft.pdf + tag: ${{ steps.release.outputs.tag_name }} + overwrite: true + body: "" diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..c68fff95 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +paper/examples/Manifest.toml +paper/jats/paper.jats +paper/jats/jso-packages.pdf +paper/paper.pdf diff --git a/paper/examples/Bench-utils.jl b/paper/examples/Bench-utils.jl new file mode 100644 index 00000000..74b29f7d --- /dev/null +++ b/paper/examples/Bench-utils.jl @@ -0,0 +1,43 @@ +module BenchUtils + +using ProximalAlgorithms +using ProximalCore +using NLPModels + +export Counting, reset_counters!, make_adnlp_compatible! + +(f::AbstractNLPModel)(x) = obj(f,x) + +function ProximalAlgorithms.value_and_gradient(f::AbstractNLPModel, x) + return obj(f,x), grad(f, x) +end + +"Wrapper compteur pour f ou g (compte #obj, #∇f, #prox)." +mutable struct Counting{T} + f::T + eval_count::Int + gradient_count::Int + prox_count::Int +end +Counting(f::T) where {T} = Counting{T}(f, 0, 0, 0) + +# f(x) +(f::Counting)(x) = (f.eval_count += 1; f.f(x)) + +# (f, ∇f) +function ProximalAlgorithms.value_and_gradient(f::Counting, x) + f.eval_count += 1 + f.gradient_count += 1 + return ProximalAlgorithms.value_and_gradient(f.f, x) +end + +# prox!(y, g, x, γ) +function ProximalCore.prox!(y, g::Counting, x, γ) + g.prox_count += 1 + return ProximalCore.prox!(y, g.f, x, γ) +end + +"Réinitialise les compteurs d’un Counting." +reset_counters!(c::Counting) = (c.eval_count = 0; c.gradient_count = 0; c.prox_count = 0; nothing) + +end # module diff --git a/paper/examples/Benchmark.jl b/paper/examples/Benchmark.jl new file mode 100644 index 00000000..590af9d8 --- /dev/null +++ b/paper/examples/Benchmark.jl @@ -0,0 +1,291 @@ + +############################# +# ======== IMPORTS ======== # +############################# +using Random, LinearAlgebra +using ProximalOperators, ProximalCore, ProximalAlgorithms +using ShiftedProximalOperators +using NLPModels, NLPModelsModifiers +using RegularizedOptimization, RegularizedProblems +using MLDatasets +using PrettyTables +using LaTeXStrings + + +# Local includes +include("comparison-config.jl") +using .ComparisonConfig: CFG, CFG2 + +include("Bench-utils.jl") +using .BenchUtils + +############################# +# ===== Helper utils ====== # +############################# + +# Generic config printer (works for both CFG / CFG2) +function print_config(cfg) + println("Configuration:") + for fld in fieldnames(typeof(cfg)) + val = getfield(cfg, fld) + println(rpad(" $(String(fld))", 16), " = ", val) + end +end + +# Common QN selector +function ensure_qn(model, which::Symbol) + which === :LBFGS && return LBFGSModel(model) + which === :LSR1 && return LSR1Model(model) + error("Unknown QN: $which (expected :LBFGS or :LSR1)") +end + +# Close a PrettyTables Markdown string by adding a bottom rule +function close_markdown_table(table_str::AbstractString) + lines = split(String(table_str), '\n') + isempty(lines) && return table_str + sep = length(lines) >= 2 ? lines[2] : repeat("-", 10) + push!(lines, sep) + return join(lines, '\n') +end + +############################# +# ======= SVM bench ======= # +############################# + +# Accuracy for SVM (as in original script) +acc = vec -> length(findall(x -> x < 1, vec)) / length(vec) * 100 + +# PANOC (SVM) with RootNormLhalf +function run_panoc_svm!(model, x0; λ = 1.0, maxit = 500, tol = 1e-3, verbose = false) + f = BenchUtils.Counting(model) + g = BenchUtils.Counting(RootNormLhalf(λ)) + algo = ProximalAlgorithms.PANOC(maxit = maxit, tol = tol, verbose = verbose) + t = @elapsed x̂, it = algo(x0 = x0, f = f, g = g) + return ( + name = "PANOC (SVM)", + status = "first_order", + time = t, + iters = it, + fevals = f.eval_count, + gevals = f.gradient_count, + proxcalls = g.prox_count, + solution = x̂, + final_obj = obj(model, x̂) + ) +end + +function run_tr_svm!(model, x0; λ = 1.0, qn = :LSR1, atol = 1e-3, rtol = 1e-3, verbose = 0, sub_kwargs = (;)) + qn_model = ensure_qn(model, qn) + reset!(qn_model) + reg_nlp = RegularizedNLPModel(qn_model, RootNormLhalf(λ)) + solver = TRSolver(reg_nlp) + stats = RegularizedExecutionStats(reg_nlp) + t = @elapsed RegularizedOptimization.solve!(solver, reg_nlp, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs) + return ( + name = "TR ($(String(qn)), SVM)", + status = string(stats.status), + time = t, + iters = get(stats.solver_specific, :outer_iter, missing), + fevals = neval_obj(qn_model), + gevals = neval_grad(qn_model), + proxcalls = get(stats.solver_specific, :prox_evals, missing), + solution = stats.solution, + final_obj = obj(model, stats.solution) + ) +end + +function run_r2n_svm!(model, x0; λ = 1.0, qn = :LBFGS, atol = 1e-3, rtol = 1e-3, verbose = 0, sub_kwargs = (;)) + qn_model = ensure_qn(model, qn) + reset!(qn_model) + reg_nlp = RegularizedNLPModel(qn_model, RootNormLhalf(λ)) + solver = R2NSolver(reg_nlp) + stats = RegularizedExecutionStats(reg_nlp) + t = @elapsed RegularizedOptimization.solve!(solver, reg_nlp, stats; + x = x0, atol = atol, rtol = rtol,verbose = verbose, sub_kwargs = sub_kwargs) + return ( + name = "R2N ($(String(qn)), SVM)", + status = string(stats.status), + time = t, + iters = get(stats.solver_specific, :outer_iter, missing), + fevals = neval_obj(qn_model), + gevals = neval_grad(qn_model), + proxcalls = get(stats.solver_specific, :prox_evals, missing), + solution = stats.solution, + final_obj = obj(model, stats.solution) + ) +end + +function bench_svm!(cfg = CFG) + Random.seed!(cfg.SEED) + model, nls_train, _ = RegularizedProblems.svm_train_model() + x0 = model.meta.x0 + + results = NamedTuple[] + (:PANOC in cfg.RUN_SOLVERS) && push!(results, run_panoc_svm!(model, x0; λ = cfg.LAMBDA_L0, maxit = cfg.MAXIT_PANOC, tol = cfg.TOL, verbose = cfg.VERBOSE_PANOC)) + (:TR in cfg.RUN_SOLVERS) && push!(results, run_tr_svm!(model, x0; λ = cfg.LAMBDA_L0, qn = cfg.QN_FOR_TR, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, sub_kwargs = cfg.SUB_KWARGS_R2N)) + (:R2N in cfg.RUN_SOLVERS) && push!(results, run_r2n_svm!(model, x0; λ = cfg.LAMBDA_L0, qn = cfg.QN_FOR_R2N, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, sub_kwargs = cfg.SUB_KWARGS_R2N)) + + # Print quick summary + println("\n=== SVM: solver comparison ===") + for m in results + println("\n→ ", m.name) + println(" status = ", m.status) + println(" time (s) = ", round(m.time, digits = 4)) + m.iters !== missing && println(" outer iters = ", m.iters) + println(" # f eval = ", m.fevals) + println(" # ∇f eval = ", m.gevals) + m.proxcalls !== missing && println(" # prox calls = ", Int(m.proxcalls)) + println(" final objective= ", round(obj(model, m.solution), digits = 4)) + println(" accuracy (%) = ", round(acc(residual(nls_train, m.solution)), digits = 1)) + end + + println("\nSVM Config:"); print_config(cfg) + + data_svm = [ + (; name=m.name, + status=string(m.status), + time=round(m.time, digits=4), + fe=m.fevals, + ge=m.gevals, + prox = m.proxcalls === missing ? missing : Int(m.proxcalls), + obj = round(obj(model, m.solution), digits=4)) + for m in results + ] + + return data_svm +end + +############################# +# ======= NNMF bench ====== # +############################# + + +function run_tr_nnmf!(model, x0; λ = 1.0, qn = :LSR1, atol = 1e-3, rtol = 1e-3, verbose = 0, sub_kwargs = (;), selected = nothing) + qn_model = ensure_qn(model, qn) + reset!(qn_model) + reg_nlp = RegularizedNLPModel(qn_model, NormL0(λ), selected) + solver = TRSolver(reg_nlp) + stats = RegularizedExecutionStats(reg_nlp) + t = @elapsed RegularizedOptimization.solve!(solver, reg_nlp, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs) + return ( + name = "TR ($(String(qn)), NNMF)", + status = string(stats.status), + time = t, + iters = get(stats.solver_specific, :outer_iter, missing), + fevals = neval_obj(qn_model), + gevals = neval_grad(qn_model), + proxcalls = get(stats.solver_specific, :prox_evals, missing), + solution = stats.solution, + final_obj = obj(model, stats.solution) + ) +end + +function run_r2n_nnmf!(model, x0; λ = 1.0, qn = :LBFGS, atol = 1e-3, rtol = 1e-3, verbose = 0, sub_kwargs = (;), σk = 1e5, selected = nothing) + qn_model = ensure_qn(model, qn) + reset!(qn_model) + reg_nlp = RegularizedNLPModel(qn_model, NormL0(λ), selected) + solver = R2NSolver(reg_nlp) + stats = RegularizedExecutionStats(reg_nlp) + t = @elapsed RegularizedOptimization.solve!(solver, reg_nlp, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, + sub_kwargs = sub_kwargs) + return ( + name = "R2N ($(String(qn)), NNMF)", + status = string(stats.status), + time = t, + iters = get(stats.solver_specific, :outer_iter, missing), + fevals = neval_obj(qn_model), + gevals = neval_grad(qn_model), + proxcalls = get(stats.solver_specific, :prox_evals, missing), + solution = stats.solution, + final_obj = obj(model, stats.solution) + ) +end + +function run_LM_nnmf!(nls_model, x0; λ = 1.0, atol = 1e-3, rtol = 1e-3, verbose = 0, selected = nothing) + reg_nls = RegularizedNLSModel(nls_model, NormL0(λ), selected) + solver = LMSolver(reg_nls) + stats = RegularizedExecutionStats(reg_nls) + t = @elapsed RegularizedOptimization.solve!(solver, reg_nls, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose) + return ( + name = "LM (NNMF)", + status = string(stats.status), + time = t, + iters = get(stats.solver_specific, :outer_iter, missing), + fevals = neval_residual(nls_model), + gevals = neval_jtprod_residual(nls_model) + neval_jprod_residual(nls_model), + proxcalls = get(stats.solver_specific, :prox_evals, missing), + solution = stats.solution, + final_obj = obj(nls_model, stats.solution) + ) +end + +function bench_nnmf!(cfg = CFG2; m = 100, n = 50, k = 5) + Random.seed!(cfg.SEED) + + model, nls_model, _, selected = nnmf_model(m, n, k) + + # build x0 on positive orthant as original + x0 = max.(rand(model.meta.nvar), 0.0) + + # heuristic lambda (copied logic) + cfg.LAMBDA_L0 = norm(grad(model, rand(model.meta.nvar)), Inf) / 200 + + results = NamedTuple[] + (:TR in cfg.RUN_SOLVERS) && push!(results, run_tr_nnmf!(model, x0; λ = cfg.LAMBDA_L0, qn = cfg.QN_FOR_TR, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, sub_kwargs = cfg.SUB_KWARGS_R2N, selected = selected)) + (:R2N in cfg.RUN_SOLVERS) && push!(results, run_r2n_nnmf!(model, x0; λ = cfg.LAMBDA_L0, qn = cfg.QN_FOR_R2N, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, sub_kwargs = cfg.SUB_KWARGS_R2N, selected = selected)) + (:LM in cfg.RUN_SOLVERS) && push!(results, run_LM_nnmf!(nls_model, x0; λ = cfg.LAMBDA_L0, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, selected = selected)) + + println("\n=== NNMF: solver comparison ===") + for m in results + println("\n→ ", m.name) + println(" status = ", m.status) + println(" time (s) = ", round(m.time, digits = 4)) + m.iters !== missing && println(" outer iters = ", m.iters) + println(" # f eval = ", m.fevals) + println(" # ∇f eval = ", m.gevals) + m.proxcalls !== missing && println(" # prox calls = ", Int(m.proxcalls)) + println(" final objective= ", round(obj(model, m.solution), digits = 4)) + end + + println("\nNNMF Config:"); print_config(cfg) + + data_nnmf = [ + (; name=m.name, + status=string(m.status), + time=round(m.time, digits=4), + fe=m.fevals, + ge=m.gevals, + prox = m.proxcalls === missing ? missing : Int(m.proxcalls), + obj = round(m.final_obj, digits=4)) + for m in results + ] + + return data_nnmf +end + +# ############################# +# # ========= Main ========== # +# ############################# + +function main() + data_svm = bench_svm!(CFG) + data_nnmf = bench_nnmf!(CFG2) + + # concat both datasets + all_data = vcat(data_svm, data_nnmf) + + table_str = pretty_table(String, all_data; + header = ["Method", "Status", L"$t$($s$)", L"$\#f$", L"$\#\nabla f$", L"$\#prox$", "Objective"], + backend = Val(:latex), + alignment = [:l, :c, :r, :r, :r, :r, :r], + ) + + open("Benchmark.tex", "w") do io + write(io, table_str) + end +end + diff --git a/paper/examples/Benchmark.tex b/paper/examples/Benchmark.tex new file mode 100644 index 00000000..0f38c3ae --- /dev/null +++ b/paper/examples/Benchmark.tex @@ -0,0 +1,10 @@ +\begin{tabular}{lcrrrrr} + \hline + \textbf{Method} & \textbf{Status} & \textbf{$t$($s$)} & \textbf{$\#f$} & \textbf{$\#\nabla f$} & \textbf{$\#prox$} & \textbf{Objective} \\\hline + PANOC (SVM) & first\_order & 14.642 & 3713 & 3713 & 2269 & 188.924 \\ + TR (LSR1, SVM) & first\_order & 1.8405 & 347 & 291 & 4037 & 179.837 \\ + R2N (LSR1, SVM) & first\_order & 0.9269 & 185 & 101 & 27932 & 192.493 \\ + TR (LBFGS, NNMF) & first\_order & 0.0552 & 42 & 40 & 3160 & 976.06 \\ + R2N (LBFGS, NNMF) & first\_order & 0.3024 & 169 & 107 & 17789 & 411.727 \\ + LM (NNMF) & first\_order & 0.2622 & 15 & 27763 & 12320 & 131.183 \\\hline +\end{tabular} diff --git a/paper/examples/Project.toml b/paper/examples/Project.toml new file mode 100644 index 00000000..dfe38a1f --- /dev/null +++ b/paper/examples/Project.toml @@ -0,0 +1,10 @@ +[deps] +MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" +NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" +NLPModelsModifiers = "e01155f1-5c6f-4375-a9d8-616dd036575f" +PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" +ProximalAlgorithms = "140ffc9f-1907-541a-a177-7475e0a401e9" +ProximalCore = "dc4f5ac2-75d1-4f31-931e-60435d74994b" +ProximalOperators = "a725b495-10eb-56fe-b38b-717eba820537" +RegularizedProblems = "ea076b23-609f-44d2-bb12-a4ae45328278" +ShiftedProximalOperators = "d4fd37fa-580c-4e43-9b30-361c21aae263" diff --git a/paper/examples/comparison-config.jl b/paper/examples/comparison-config.jl new file mode 100644 index 00000000..7d36caac --- /dev/null +++ b/paper/examples/comparison-config.jl @@ -0,0 +1,22 @@ +module ComparisonConfig + +Base.@kwdef mutable struct Config + SEED::Int = 1234 + LAMBDA_L0::Float64 = 1.0 + TOL::Float64 = 1e-4 + RTOL::Float64 = 1e-4 + MAXIT_PANOC::Int = 10000 + VERBOSE_PANOC::Bool = false + VERBOSE_RO::Int = 0 + RUN_SOLVERS::Vector{Symbol} = [:PANOC, :TR, :R2N] # mutable + QN_FOR_TR::Symbol = :LSR1 + QN_FOR_R2N::Symbol = :LBFGS + SUB_KWARGS_R2N::NamedTuple = (; max_iter = 200) + PRINT_TABLE::Bool = true +end + +# One global, constant *binding* to a mutable object = type stable & editable +const CFG = Config(QN_FOR_R2N=:LSR1) +const CFG2 = Config(RUN_SOLVERS = [:LM, :TR, :R2N], QN_FOR_TR = :LBFGS) + +end # module \ No newline at end of file diff --git a/paper/examples/example1.jl b/paper/examples/example1.jl new file mode 100644 index 00000000..27db1ae5 --- /dev/null +++ b/paper/examples/example1.jl @@ -0,0 +1,13 @@ +using LinearAlgebra, Random, ProximalOperators +using NLPModels, RegularizedProblems, RegularizedOptimization +using MLDatasets + +Random.seed!(1234) +model, nls_model, _ = RegularizedProblems.svm_train_model() # Build SVM model +f = LSR1Model(model) # L-SR1 Hessian approximation +λ = 1.0 # Regularization parameter +h = RootNormLhalf(1.0) # Nonsmooth term +reg_nlp = RegularizedNLPModel(f, h) # Regularized problem +solver = R2NSolver(reg_nlp) # Choose solver +stats = RegularizedExecutionStats(reg_nlp) +solve!(solver, reg_nlp, stats; atol=1e-4, rtol=1e-4, verbose=0, sub_kwargs=(max_iter=200,)) diff --git a/paper/jso-packages.pdf b/paper/jso-packages.pdf new file mode 100644 index 00000000..77683eb6 Binary files /dev/null and b/paper/jso-packages.pdf differ diff --git a/paper/paper.bib b/paper/paper.bib index e69de29b..37da40fd 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -0,0 +1,161 @@ +@Article{ aravkin-baraldi-orban-2022, + Author = {A. Y. Aravkin and R. Baraldi and D. Orban}, + Title = {A Proximal Quasi-{N}ewton Trust-Region Method for Nonsmooth Regularized Optimization}, + Journal = siopt, + Year = 2022, + Volume = 32, + Number = 2, + Pages = {900--929}, + doi = {10.1137/21M1409536}, +} + +@Article{ aravkin-baraldi-orban-2024, + Author = {A. Y. Aravkin and R. Baraldi and D. Orban}, + Title = {A {L}evenberg–{M}arquardt Method for Nonsmooth Regularized Least Squares}, + Journal = sisc, + Year = 2024, + Volume = 46, + Number = 4, + Pages = {A2557--A2581}, + doi = {10.1137/22M1538971}, +} + +@Software{ leconte_linearoperators_jl_linear_operators_2023, + Author = {Leconte, Geoffroy and Orban, Dominique and Soares Siqueira, Abel and contributors}, + license = {MPL-2.0}, + Title = {{LinearOperators.jl: Linear Operators for Julia}}, + url = {https://github.com/JuliaSmoothOptimizers/LinearOperators.jl}, + version = {2.6.0}, + Year = 2023, +} + +@Article{ leconte-orban-2023, + Author = {G. Leconte and D. Orban}, + Title = {The Indefinite Proximal Gradient Method}, + Journal = coap, + Year = 2025, + Volume = 91, + Number = 2, + Pages = 861--903, + doi = {10.1007/s10589-024-00604-5}, +} + +@TechReport{ leconte-orban-2023-2, + Author = {G. Leconte and D. Orban}, + Title = {Complexity of trust-region methods with unbounded {H}essian approximations for smooth and nonsmooth optimization}, + Institution = gerad, + Year = 2023, + Type = {Cahier}, + Number = {G-2023-65}, + Address = gerad-address, + url = {https://www.gerad.ca/fr/papers/G-2023-65}, +} + +@TechReport{ diouane-habiboullah-orban-2024, + Author = {Youssef Diouane and Mohamed Laghdaf Habiboullah and Dominique Orban}, + Title = {A proximal modified quasi-Newton method for nonsmooth regularized optimization}, + Institution = {GERAD}, + Year = 2024, + Type = {Cahier}, + Number = {G-2024-64}, + Address = {Montr\'eal, Canada}, + doi = {10.48550/arxiv.2409.19428}, + url = {https://www.gerad.ca/fr/papers/G-2024-64}, +} + +@TechReport{ diouane-gollier-orban-2024, + Author = {Youssef Diouane and Maxence Gollier and Dominique Orban}, + Title = {A nonsmooth exact penalty method for equality-constrained optimization: complexity and implementation}, + Institution = {GERAD}, + Year = 2024, + Type = {Cahier}, + Number = {G-2024-65}, + Address = {Montr\'eal, Canada}, + doi = {10.13140/RG.2.2.16095.47527}, +} + +@Misc{orban-siqueira-cutest-2020, + author = {D. Orban and A. S. Siqueira and {contributors}}, + title = {{CUTEst.jl}: {J}ulia's {CUTEst} interface}, + month = {October}, + url = {https://github.com/JuliaSmoothOptimizers/CUTEst.jl}, + year = {2020}, + DOI = {10.5281/zenodo.1188851}, +} + +@Misc{orban-siqueira-nlpmodels-2020, + author = {D. Orban and A. S. Siqueira and {contributors}}, + title = {{NLPModels.jl}: Data Structures for Optimization Models}, + month = {July}, + url = {https://github.com/JuliaSmoothOptimizers/NLPModels.jl}, + year = {2020}, + DOI = {10.5281/zenodo.2558627}, +} + +@Misc{jso, + author = {T. Migot and D. Orban and A. S. Siqueira}, + title = {The {JuliaSmoothOptimizers} Ecosystem for Linear and Nonlinear Optimization}, + year = {2021}, + url = {https://juliasmoothoptimizers.github.io/}, + doi = {10.5281/zenodo.2655082}, +} + +@Misc{migot-orban-siqueira-optimizationproblems-2023, + author = {T. Migot and D. Orban and A. S. Siqueira}, + title = {OptimizationProblems.jl: A collection of optimization problems in Julia}, + year = {2023}, + doi = {10.5281/zenodo.3672094}, + url = {https://github.com/JuliaSmoothOptimizers/OptimizationProblems.jl}, +} + +@techreport{kim-park-2008, + title = {Sparse Nonnegative Matrix Factorization for Clustering}, + author = {Jingu Kim and Haesun Park}, + institution = {Georgia Inst. of Technology}, + number = {GT-CSE-08-01}, + year = {2008}, + url = {http://hdl.handle.net/1853/20058}, +} + +@InProceedings{ stella-themelis-sopasakis-patrinos-2017, + Author = {L. {Stella} and A. {Themelis} and P. {Sopasakis} and P. {Patrinos}}, + Title = {A simple and efficient algorithm for nonlinear model predictive control}, + Booktitle = {2017 IEEE 56th Annual Conference on Decision and Control (CDC)}, + Year = 2017, + Pages = {1939--1944}, + doi = {10.1109/CDC.2017.8263933}, +} + +@article{demarchi-jia-kanzow-mehlitz-2023, + author = {De~Marchi, Alberto and Jia, Xiaoxi and Kanzow, Christian and Mehlitz, Patrick}, + title = {Constrained composite optimization and augmented {L}agrangian methods}, + journal = {Mathematical Programming}, + year = {2023}, + month = {9}, + volume = {201}, + number = {1}, + pages = {863--896}, + doi = {10.1007/s10107-022-01922-4}, +} + +@Article{ themelis-stella-patrinos-2017, + Author = {Themelis, Andreas and Stella, Lorenzo and Patrinos, Panagiotis}, + Title = {Forward-Backward Envelope for the Sum of Two Nonconvex Functions: Further Properties and Nonmonotone line seach Algorithms}, + Journal = siopt, + Year = 2018, + Volume = 28, + Number = 3, + Pages = {2274--2303}, + doi = {10.1137/16M1080240}, +} + +@article{eckstein1992douglas, + title={On the Douglas—Rachford splitting method and the proximal point algorithm for maximal monotone operators}, + author={Eckstein, Jonathan and Bertsekas, Dimitri P}, + journal={Mathematical programming}, + volume={55}, + number={1}, + pages={293--318}, + year={1992}, + publisher={Springer} +} diff --git a/paper/paper.md b/paper/paper.md index 03c2d9a0..e695e53b 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -1,5 +1,5 @@ --- -title: 'RegularizedOptimization.jl: A Julia framework for regularization-based nonlinear optimization' +title: 'RegularizedOptimization.jl: A Julia framework for regularized and nonsmooth optimization' tags: - Julia - nonsmooth optimization @@ -30,4 +30,178 @@ header-includes: | \setmonofont[Path = ./, Scale=0.68]{JuliaMono-Regular.ttf} --- -# References \ No newline at end of file +# Summary + +[RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) is a Julia package that implements a family of quadratic regularization and trust-region type algorithms for solving nonsmooth optimization problem +\begin{equation}\label{eq:nlp} + \underset{x \in \mathbb{R}^n}{\text{minimize}} \quad f(x) + h(x), \quad s.t. \quad c(x) = 0, +\end{equation} +where $f: \mathbb{R}^n \to \mathbb{R}$ is continuously differentiable, $h: \mathbb{R}^n \to \mathbb{R} \cup \{+\infty\}$ is lower semi-continuous which that the regularizer such as sparsity-inducing penalties, bound constraints or a combination of both and $c: \mathbb{R}^n \to \mathbb{R}^m$ is continuously differentiable defining equality constraints. +All $f$, $h$, and $c$ can be nonconvex. +The library provides a modular and extensible framework for experimenting with nonsmooth and nonconvex optimization algorithms, including: + +- **Trust-region solvers (TR, TRDH)** [@aravkin-baraldi-orban-2022;@leconte-orban-2023], +- **Quadratic regularization solvers (R2, R2N)** [@diouane-habiboullah-orban-2024;@aravkin-baraldi-orban-2022], +- **Levenberg-Marquardt solvers (LM, LMTR)** [@aravkin-baraldi-orban-2024]. +- **Augmented Lagrangian solver (AL)** [@demarchi-jia-kanzow-mehlitz-2023]. + +Except of the **AL** solver, these methods rely on the gradient and optionally on the Hessian(-vector) information of the smooth part $f$ and the proximal mapping of the nonsmooth part $h$ in order to compute steps. +Then, the objective function $f + h$ is used only to accept or reject trial points. + +Solvers in [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) allow inexact resolution of trust-region and quadratic-regularized subproblems using first-order that are implemented in the package itself such as the quadratic regularization method R2 [@aravkin-baraldi-orban-2022] and R2DH [@diouane-habiboullah-orban-2024] with trust-region variants TRDH [@leconte-orban-2023-2]. + +All solvers in [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) are implemented in an in-place fashion, minimizing memory allocations during the solution process. +Moreover, they implement non-monotone strategies to accept trial points, which can enhance algorithmic performance in practice [@leconte-orban-2023;@diouane-habiboullah-orban-2024]. + +## Requirements of the ShiftedProximalOperators.jl + +The nonsmooth part $h$ must have a computable proximal mapping, defined as +$$\text{prox}_{\nu h}(v) = \underset{x \in \mathbb{R}^n}{\arg\min} \frac{1}{2} \|x - v\|^2 + \nu h(x).$$ + +While [ProximalOperators.jl](https://github.com/JuliaFirstOrder/ProximalOperators.jl) provides many standard proximal mappings, [ShiftedProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ShiftedProximalOperators.jl) also supplies **shifted** variants of these mappings which is not supported by [ProximalOperators.jl](https://www.github.com/JuliaFirstOrder/ProximalOperators.jl). + +# Statement of need + +## Model-based framework for nonsmooth methods + +In Julia, \eqref{eq:nlp} can be solved using [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl), which implements in-place, first-order, line-search–based methods [@stella-themelis-sopasakis-patrinos-2017;@themelis-stella-patrinos-2017]. +Most of these methods are splitting schemes that either alternate between the proximal operators of $f$ and $h$, as in the **Douglas–Rachford** solver [@eckstein1992douglas], or take a step along a direction $d$, which depends on the gradient of $f$, possibly modified by a L-BFGS Quasi-Newton approximation followed by proximal steps on the nonsmooth part $h$. In some cases, such as with the **PANOC** [@stella-themelis-sopasakis-patrinos-2017] solver, this process is augmented with a line-search mechanism along $d$. + +By contrast, [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) focuses on model-based approaches such as trust-region and quadratic regularization algorithms. +As shown in [@aravkin-baraldi-orban-2022], model-based methods typically require fewer evaluations of the objective and its gradient than first-order line search methods, at the expense of requiring a lot of proximal iterations to solve the subproblems. +Although these subproblems may require many proximal iterations, each proximal computation is inexpensive for nuumerous commonly used nonsmooth functions, such as separable penalties and bound constraints (see examples below), making the overall approach efficient for large-scale problems. + +The package provides a consistent API to formulate optimization problems and apply different solvers. +It integrates seamlessly with the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) [@jso] ecosystem, an academic organization for nonlinear optimization software development, testing, and benchmarking. + +On the one hand, the smooth objective $f$ can be defined via [NLPModels.jl](https://github.com/JuliaSmoothOptimizers/NLPModels.jl) [@orban-siqueira-nlpmodels-2020], which provides a standardized Julia API for representing nonlinear programming (NLP) problems. +Large collections of such problems are available in [CUTE.jl](https://github.com/JuliaSmoothOptimizers/CUTEst.jl) [@orban-siqueira-cutest-2020] and [OptimizationProblems.jl](https://github.com/JuliaSmoothOptimizers/OptimizationProblems.jl) [@migot-orban-siqueira-optimizationproblems-2023]. +Another option is to use [RegularizedProblems.jl](https://github.com/JuliaSmoothOptimizers/RegularizedProblems.jl), which provides problem instances commonly used in the nonsmooth optimization literature, where $f$ can be paired with various nonsmooth terms $h$. + +On the other hand, nonsmooth terms $h$ can be modeled using [ProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ProximalOperators.jl), which provides a broad collection of nonsmooth functions, together with [ShiftedProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ShiftedProximalOperators.jl), which provides shifted proximal mappings for nonsmooth functions. +Specifically, the package supports shifted proximal operators of the form +$$ + \underset{t \in \mathbb{R}^n}{\arg\min} \, { \tfrac{1}{2} ‖t - q‖₂² + ν \psi(t + s;x) + χ(s + t\mid Δ\mathbb{B})} +$$ +where $\psi(;x)$ is a nonsmooth function that models $h$, in general we set $\psi(t;x) = h(x+t)$, $q$ is given, $x$ and $s$ are fixed shifts, $h$ is the nonsmooth term with respect +to which we are computing the proximal operator, and $χ(.| \Delta \mathbb{B})$ is the indicator of +a ball of radius $\Delta > 0$ defined by a certain norm. + +These shifted operators allow us to (i) incorporate bound or trust-region constraints via the indicator term which is required for the **TR** and **TRDH** algorithms and (ii) evaluate the prox **in place**, without additional allocations, which integrates efficiently with our subproblem solvers. + +## Support for Hessians and Hessian approximations of the smooth part $f$ + +In contrast to [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl), [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) methods such as **R2N** and **TR** methods support exact Hessians as well as several Hessian approximations of $f$. + +Hessian–vector products $v \mapsto Hv$ can be obtained via automatic differentiation through [ADNLPModels.jl](https://github.com/JuliaSmoothOptimizers/ADNLPModels.jl) or implemented normally. + +Hessian approximations (e.g., quasi-Newton and diagonal schemes) can be selected from via [LinearOperators.jl](https://github.com/JuliaSmoothOptimizers/LinearOperators.jl). + +This design allows algorithms to exploit second-order information **without** explicitly forming dense or sparse Hessian matrices, which is often expensive in time and memory, particularly at large scale. + +## Requirements of the RegularizedProblems.jl + +With $f$ and $h$ modeled as discussed above, the package [RegularizedProblems.jl](https://github.com/JuliaSmoothOptimizers/RegularizedProblems.jl) provides a straightforward way to pair them into a *Regularized Nonlinear Programming Model* + +```julia +reg_nlp = RegularizedNLPModel(f, h) +``` + +They can also be paired into a *Regularized Nonlinear Least Squares Model* if $f(.) = \tfrac{1}{2} \|F(.)\|^2$ for some residual function $F: \mathbb{R}^n \to \mathbb{R}^m$, which is required for the **LM** and **LMTR** solvers. + +```julia +reg_nls = RegularizedNLSModel(f, h) +``` + +This design makes for a convenient source of reproducible problem instances for testing and benchmarking the solvers in [RegularizedOptimization.jl](https://www.github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl). + +## Testing and documentation + +The package includes a comprehensive suite of unit tests that cover all functionalities, ensuring reliability and correctness. +Extensive documentation is provided, including a user guide, API reference, and examples to help users get started quickly. +Documentation is built using Documenter.jl. + +## Application studies + +The package is used to solve equality-constrained optimization problems by means of the exact penalty approach [@diouane-gollier-orban-2024] where the model of the nonsmooth part differs from the function $h$ itself. +This is not covered in the current version of the package [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl). + +# Examples + +We illustrate the capabilities of [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) on two nonsmooth and nonconvex problems: + +- **Support Vector Machine (SVM) with $\ell_{1/2}^{1/2}$ penalty** for image classification [@aravkin-baraldi-orban-2024]. +- **Nonnegative Matrix Factorization (NNMF) with $\ell_0$ penalty and bound constraints** [@kim-park-2008]. + +Below is a condensed example showing how to define and solve SVM problem: + +```julia +using LinearAlgebra, Random, ProximalOperators +using NLPModels, RegularizedProblems, RegularizedOptimization +using MLDatasets + +Random.seed!(1234) +model, nls_model, _ = RegularizedProblems.svm_train_model() # Build SVM model +f = LSR1Model(model) # L-SR1 Hessian approximation +λ = 1.0 # Regularization parameter +h = RootNormLhalf(1.0) # Nonsmooth term +reg_nlp = RegularizedNLPModel(f, h) # Regularized problem +solver = R2NSolver(reg_nlp) # Choose solver +stats = RegularizedExecutionStats(reg_nlp) +solve!(solver, reg_nlp, stats; atol=1e-4, rtol=1e-4, verbose=0, sub_kwargs=(max_iter=200,)) +solve!(solver, reg_nlp, stats; atol=1e-5, rtol=1e-5, verbose=0, sub_kwargs=(max_iter=200,)) +``` + +The NNMF problem can be set up in a similar fashion: + +```julia +Random.seed!(1234) +m, n, k = 100, 50, 5 +model, nls_model, _, selected = nnmf_model(m, n, k) # Build NNMF model +x0 = rand(model.meta.nvar) # Initial point +λ = norm(grad(model, rand(model.meta.nvar)), Inf) / 200 # Regularization parameter +h = NormL0(λ) # Nonsmooth term +reg_nls = RegularizedNLSModel(nls_model, h) # Regularized problem for LM +solver = LMSolver(reg_nls) # Choose solver +``` + +## Numerical results + +We compare **PANOC** [@stella-themelis-sopasakis-patrinos-2017] (from [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl)) against **TR**, **R2N**, and **LM** from our library. +In order to do so, we implemented a wrapper for **PANOC** to make it compatible with our problem definition. + +We report the following solver statistics in table: the final value of \(f\) at convergence; the status of convergence of each solver (**Status**); the number of evaluations of the smooth objective ($\# f$); the number of evaluations of the gradient ($\# \nabla f$); the number of proximal operator evaluations ($\# \text{prox}$); the elapsed time $t$ in seconds and the final objective value $(f + h)(x^*)$ (**Objective**). + +\input{examples/Benchmark.tex} + +* For the LM solver, gradient evaluations count $\#\nabla f$ equals the number of Jacobian–vector and adjoint-Jacobian–vector products. + +## Discussion + +According to **status**, all methods successfully reduced the optimality measure below the specified tolerance which is set to $10^{-4}$ and thus converged to a **first-order** stationary point. +However, the final objective values differ due to the nonconvexity of the problems. + +- **SVM with $\ell^{1/2}$ penalty:** **TR** and **R2N** require far fewer function and gradient evaluations than **PANOC**, at the expense of more proximal iterations. Since each proximal step is inexpensive, **TR** and **R2N** are much faster overall. +- **NNMF with constrained $\ell_0$ penalty:** **TR** outperforms **R2N**, while **LM** is competitive in terms of function calls but incurs many Jacobian products. + +Additional tests (e.g., other regularizers, constraint types, and scaling dimensions) have also been conducted, and a full benchmarking campaign is currently underway. + +# Conclusion + +The experiments highlight the effectiveness of the solvers implemented in [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) compared to **PANOC** from [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl). + +On these examples, the performance of the solvers can be summarized as follows: + +- **Function and gradient evaluations:** **TR** and **R2N** are the most efficient choices when aiming to minimize both. +- **Function evaluations only:** **LM** is preferable when the problem is a nonlinear least squares problem, as it achieves the lowest number of function evaluations. +- **Proximal iterations:** **PANOC** requires the fewest proximal iterations. However, in most nonsmooth applications, proximal steps are relatively inexpensive, so this criterion is of limited practical relevance. + +In the future, the package will be extended with additional algorithms that enable to reduce the number of proximal evaluations, especially when the proximal mapping of $h$ is expensive to compute. + +# Acknowledgements + +The authors would like to thank Alberto Demarchi for his implementation of the Augmented Lagrangian solver. +Mohamed Laghdaf Habiboullah is supported by an excellence FRQNT grant. +Youssef Diouane and Dominique Orban are partially supported by an NSERC Discovery Grant. + +# References