diff --git a/.cirrus.yml b/.cirrus.yml deleted file mode 100644 index ad5fb1d9..00000000 --- a/.cirrus.yml +++ /dev/null @@ -1,26 +0,0 @@ -task: - matrix: - - name: FreeBSD - freebsd_instance: - image_family: freebsd-14-2 - env: - matrix: - - JULIA_VERSION: 1 - install_script: | - URL="https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh" - set -x - if [ "$(uname -s)" = "Linux" ] && command -v apt; then - apt update - apt install -y curl - fi - if command -v curl; then - sh -c "$(curl ${URL})" - elif command -v wget; then - sh -c "$(wget ${URL} -q -O-)" - elif command -v fetch; then - sh -c "$(fetch ${URL} -o -)" - fi - build_script: - - cirrusjl build - test_script: - - cirrusjl test diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml index c0e82906..1f6fc01c 100644 --- a/.github/workflows/draft-pdf.yml +++ b/.github/workflows/draft-pdf.yml @@ -21,7 +21,7 @@ jobs: journal: joss # This should be the path to the paper within your repo. paper-path: paper/paper.md - - name: Upload + - name: Upload pdf artifact uses: actions/upload-artifact@v4 with: name: paper @@ -29,3 +29,24 @@ jobs: # PDF. Note, this should be the same directory as the input # paper.md path: paper/paper.pdf + - name: Create release + if: github.event_name == 'push' + uses: rymndhng/release-on-push-action@master + id: release + with: + bump_version_scheme: patch + tag_prefix: v + release_body: "" + use_github_release_notes: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Upload PDF to release + if: github.event_name == 'push' + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: paper/paper.pdf + asset_name: joss-draft.pdf + tag: ${{ steps.release.outputs.tag_name }} + overwrite: true + body: "" diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..c68fff95 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +paper/examples/Manifest.toml +paper/jats/paper.jats +paper/jats/jso-packages.pdf +paper/paper.pdf diff --git a/paper/examples/Benchmark.jl b/paper/examples/Benchmark.jl new file mode 100644 index 00000000..661980e1 --- /dev/null +++ b/paper/examples/Benchmark.jl @@ -0,0 +1,371 @@ + +############################# +# ======== IMPORTS ======== # +############################# +using Random, LinearAlgebra +using ProximalOperators, ProximalCore, ProximalAlgorithms +using ShiftedProximalOperators +using NLPModels, NLPModelsModifiers +using RegularizedOptimization, RegularizedProblems +using MLDatasets +using PrettyTables +using LaTeXStrings + + +# Local includes +include("comparison-config.jl") +using .ComparisonConfig: CFG, CFG2 + +############################# +# ===== Helper utils ====== # +############################# + +# Generic config printer (works for both CFG / CFG2) +function print_config(cfg) + println("Configuration:") + for fld in fieldnames(typeof(cfg)) + val = getfield(cfg, fld) + println(rpad(" $(String(fld))", 16), " = ", val) + end +end + +# Common QN selector +function ensure_qn(model, which::Symbol) + which === :LBFGS && return LBFGSModel(model) + which === :LSR1 && return LSR1Model(model) + error("Unknown QN: $which (expected :LBFGS or :LSR1)") +end + + +############################# +# ======= SVM bench ======= # +############################# + +# Accuracy for SVM (as in original script) +acc = vec -> length(findall(x -> x < 1, vec)) / length(vec) * 100 + +function run_tr_svm!(model, x0; λ = 1.0, qn = :LSR1, atol = 1e-3, rtol = 1e-3, verbose = 0, sub_kwargs = (;)) + qn_model = ensure_qn(model, qn) + reset!(qn_model) + reg_nlp = RegularizedNLPModel(qn_model, RootNormLhalf(λ)) + solver = TRSolver(reg_nlp) + stats = RegularizedExecutionStats(reg_nlp) + RegularizedOptimization.solve!(solver, reg_nlp, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs) + reset!(qn_model) # Reset counters before timing + reg_nlp = RegularizedNLPModel(qn_model, RootNormLhalf(λ)) + solver = TRSolver(reg_nlp) + t = @elapsed RegularizedOptimization.solve!(solver, reg_nlp, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs) + return ( + name = "TR ($(String(qn)), SVM)", + status = string(stats.status), + time = t, + iters = get(stats.solver_specific, :outer_iter, missing), + fevals = neval_obj(qn_model), + gevals = neval_grad(qn_model), + proxcalls = get(stats.solver_specific, :prox_evals, missing), + solution = stats.solution, + final_obj = obj(model, stats.solution) + ) +end + +function run_r2n_svm!(model, x0; λ = 1.0, qn = :LBFGS, atol = 1e-3, rtol = 1e-3, verbose = 0, sub_kwargs = (;)) + qn_model = ensure_qn(model, qn) + reset!(qn_model) + reg_nlp = RegularizedNLPModel(qn_model, RootNormLhalf(λ)) + solver = R2NSolver(reg_nlp) + stats = RegularizedExecutionStats(reg_nlp) + RegularizedOptimization.solve!(solver, reg_nlp, stats; + x = x0, atol = atol, rtol = rtol,verbose = verbose, sub_kwargs = sub_kwargs) + reset!(qn_model) # Reset counters before timing + reg_nlp = RegularizedNLPModel(qn_model, RootNormLhalf(λ)) # Re-create to reset prox eval count + solver = R2NSolver(reg_nlp) + t = @elapsed RegularizedOptimization.solve!(solver, reg_nlp, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs) + return ( + name = "R2N ($(String(qn)), SVM)", + status = string(stats.status), + time = t, + iters = get(stats.solver_specific, :outer_iter, missing), + fevals = neval_obj(qn_model), + gevals = neval_grad(qn_model), + proxcalls = get(stats.solver_specific, :prox_evals, missing), + solution = stats.solution, + final_obj = obj(model, stats.solution) + ) +end + +function run_LM_svm!(nls_model, x0; λ = 1.0, atol = 1e-3, rtol = 1e-3, verbose = 0, sub_kwargs = (;)) + reg_nls = RegularizedNLSModel(nls_model, RootNormLhalf(λ)) + solver = LMSolver(reg_nls) + stats = RegularizedExecutionStats(reg_nls) + RegularizedOptimization.solve!(solver, reg_nls, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs) + reset!(nls_model) # Reset counters before timing + reg_nls = RegularizedNLSModel(nls_model, RootNormLhalf(λ)) + solver = LMSolver(reg_nls) + t = @elapsed RegularizedOptimization.solve!(solver, reg_nls, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs) + return ( + name = "LM (SVM)", + status = string(stats.status), + time = t, + iters = get(stats.solver_specific, :outer_iter, missing), + fevals = neval_residual(nls_model), + gevals = neval_jtprod_residual(nls_model) + neval_jprod_residual(nls_model), + proxcalls = get(stats.solver_specific, :prox_evals, missing), + solution = stats.solution, + final_obj = obj(nls_model, stats.solution) + ) +end + +function run_LMTR_svm!(nls_model, x0; λ = 1.0, atol = 1e-3, rtol = 1e-3, verbose = 0, sub_kwargs = (;)) + reg_nls = RegularizedNLSModel(nls_model, RootNormLhalf(λ)) + solver = LMTRSolver(reg_nls) + stats = RegularizedExecutionStats(reg_nls) + RegularizedOptimization.solve!(solver, reg_nls, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs) + reset!(nls_model) # Reset counters before timing + reg_nls = RegularizedNLSModel(nls_model, RootNormLhalf(λ)) + solver = LMTRSolver(reg_nls) + t = @elapsed RegularizedOptimization.solve!(solver, reg_nls, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs) + return ( + name = "LMTR (SVM)", + status = string(stats.status), + time = t, + iters = get(stats.solver_specific, :outer_iter, missing), + fevals = neval_residual(nls_model), + gevals = neval_jtprod_residual(nls_model) + neval_jprod_residual(nls_model), + proxcalls = get(stats.solver_specific, :prox_evals, missing), + solution = stats.solution, + final_obj = obj(nls_model, stats.solution) + ) +end + +function bench_svm!(cfg = CFG) + Random.seed!(cfg.SEED) + model, nls_train, _ = RegularizedProblems.svm_train_model() + x0 = model.meta.x0 + + results = NamedTuple[] + (:TR in cfg.RUN_SOLVERS) && push!(results, run_tr_svm!(model, x0; λ = cfg.LAMBDA_L0, qn = cfg.QN_FOR_TR, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, sub_kwargs = cfg.SUB_KWARGS_R2N)) + (:R2N in cfg.RUN_SOLVERS) && push!(results, run_r2n_svm!(model, x0; λ = cfg.LAMBDA_L0, qn = cfg.QN_FOR_R2N, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, sub_kwargs = cfg.SUB_KWARGS_R2N)) + (:LM in cfg.RUN_SOLVERS) && push!(results, run_LM_svm!(nls_train, x0; λ = cfg.LAMBDA_L0, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, sub_kwargs = cfg.SUB_KWARGS_R2N)) + (:LMTR in cfg.RUN_SOLVERS) && push!(results, run_LMTR_svm!(nls_train, x0; λ = cfg.LAMBDA_L0, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, sub_kwargs = cfg.SUB_KWARGS_R2N)) + + # Print quick summary + println("\n=== SVM: solver comparison ===") + for m in results + println("\n→ ", m.name) + println(" status = ", m.status) + println(" time (s) = ", round(m.time, digits = 4)) + m.iters !== missing && println(" outer iters = ", m.iters) + println(" # f eval = ", m.fevals) + println(" # ∇f eval = ", m.gevals) + m.proxcalls !== missing && println(" # prox calls = ", Int(m.proxcalls)) + println(" final objective= ", round(obj(model, m.solution), digits = 4)) + println(" accuracy (%) = ", round(acc(residual(nls_train, m.solution)), digits = 1)) + end + + println("\nSVM Config:"); print_config(cfg) + + data_svm = [ + (; name=m.name, + status=string(m.status), + time=round(m.time, digits=4), + fe=m.fevals, + ge=m.gevals, + prox = m.proxcalls === missing ? missing : Int(m.proxcalls), + obj = round(obj(model, m.solution), digits=4)) + for m in results + ] + + return data_svm +end + +############################# +# ======= NNMF bench ====== # +############################# + +function run_tr_nnmf!(model, x0; λ = 1.0, qn = :LSR1, atol = 1e-3, rtol = 1e-3, verbose = 0, sub_kwargs = (;), selected = nothing) + qn_model = ensure_qn(model, qn) + reset!(qn_model) + reg_nlp = RegularizedNLPModel(qn_model, NormL0(λ), selected) + solver = TRSolver(reg_nlp) + stats = RegularizedExecutionStats(reg_nlp) + RegularizedOptimization.solve!(solver, reg_nlp, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs) + reset!(qn_model) # Reset counters before timing + reg_nlp = RegularizedNLPModel(qn_model, NormL0(λ), selected) # Re-create to reset prox eval count + solver = TRSolver(reg_nlp) + t = @elapsed RegularizedOptimization.solve!(solver, reg_nlp, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs) + return ( + name = "TR ($(String(qn)), NNMF)", + status = string(stats.status), + time = t, + iters = get(stats.solver_specific, :outer_iter, missing), + fevals = neval_obj(qn_model), + gevals = neval_grad(qn_model), + proxcalls = get(stats.solver_specific, :prox_evals, missing), + solution = stats.solution, + final_obj = obj(model, stats.solution) + ) +end + +function run_r2n_nnmf!(model, x0; λ = 1.0, qn = :LBFGS, atol = 1e-3, rtol = 1e-3, verbose = 0, sub_kwargs = (;), σk = 1e5, selected = nothing) + qn_model = ensure_qn(model, qn) + reset!(qn_model) + reg_nlp = RegularizedNLPModel(qn_model, NormL0(λ), selected) + solver = R2NSolver(reg_nlp) + stats = RegularizedExecutionStats(reg_nlp) + RegularizedOptimization.solve!(solver, reg_nlp, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, + sub_kwargs = sub_kwargs) + + reset!(qn_model) # Reset counters before timing + reg_nlp = RegularizedNLPModel(qn_model, NormL0(λ), selected) # Re-create to reset prox eval count + solver = R2NSolver(reg_nlp) + t = @elapsed RegularizedOptimization.solve!(solver, reg_nlp, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, + sub_kwargs = sub_kwargs) + return ( + name = "R2N ($(String(qn)), NNMF)", + status = string(stats.status), + time = t, + iters = get(stats.solver_specific, :outer_iter, missing), + fevals = neval_obj(qn_model), + gevals = neval_grad(qn_model), + proxcalls = get(stats.solver_specific, :prox_evals, missing), + solution = stats.solution, + final_obj = obj(model, stats.solution) + ) +end + +function run_LM_nnmf!(nls_model, x0; λ = 1.0, atol = 1e-3, rtol = 1e-3, verbose = 0, selected = nothing, sub_kwargs = (;)) + reg_nls = RegularizedNLSModel(nls_model, NormL0(λ), selected) + solver = LMSolver(reg_nls) + stats = RegularizedExecutionStats(reg_nls) + RegularizedOptimization.solve!(solver, reg_nls, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs) + reset!(nls_model) # Reset counters before timing + reg_nls = RegularizedNLSModel(nls_model, NormL0(λ), selected) + solver = LMSolver(reg_nls) + t = @elapsed RegularizedOptimization.solve!(solver, reg_nls, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs) + return ( + name = "LM (NNMF)", + status = string(stats.status), + time = t, + iters = get(stats.solver_specific, :outer_iter, missing), + fevals = neval_residual(nls_model), + gevals = neval_jtprod_residual(nls_model) + neval_jprod_residual(nls_model), + proxcalls = get(stats.solver_specific, :prox_evals, missing), + solution = stats.solution, + final_obj = obj(nls_model, stats.solution) + ) +end + +function run_LMTR_nnmf!(nls_model, x0; λ = 1.0, atol = 1e-3, rtol = 1e-3, verbose = 0, selected = nothing, sub_kwargs = (;)) + reg_nls = RegularizedNLSModel(nls_model, NormL0(λ), selected) + solver = LMTRSolver(reg_nls) + stats = RegularizedExecutionStats(reg_nls) + RegularizedOptimization.solve!(solver, reg_nls, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs) + reset!(nls_model) # Reset counters before timing + reg_nls = RegularizedNLSModel(nls_model, NormL0(λ), selected) + solver = LMTRSolver(reg_nls) + t = @elapsed RegularizedOptimization.solve!(solver, reg_nls, stats; + x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs) + return ( + name = "LMTR (NNMF)", + status = string(stats.status), + time = t, + iters = get(stats.solver_specific, :outer_iter, missing), + fevals = neval_residual(nls_model), + gevals = neval_jtprod_residual(nls_model) + neval_jprod_residual(nls_model), + proxcalls = get(stats.solver_specific, :prox_evals, missing), + solution = stats.solution, + final_obj = obj(nls_model, stats.solution) + ) +end + +function bench_nnmf!(cfg = CFG2; m = 100, n = 50, k = 5) + Random.seed!(cfg.SEED) + + model, nls_model, _, selected = nnmf_model(m, n, k) + + # build x0 on positive orthant as original + x0 = max.(rand(model.meta.nvar), 0.0) + + # heuristic lambda (copied logic) + cfg.LAMBDA_L0 = norm(grad(model, rand(model.meta.nvar)), Inf) / 200 + + results = NamedTuple[] + (:TR in cfg.RUN_SOLVERS) && push!(results, run_tr_nnmf!(model, x0; λ = cfg.LAMBDA_L0, qn = cfg.QN_FOR_TR, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, sub_kwargs = cfg.SUB_KWARGS_R2N, selected = selected)) + (:R2N in cfg.RUN_SOLVERS) && push!(results, run_r2n_nnmf!(model, x0; λ = cfg.LAMBDA_L0, qn = cfg.QN_FOR_R2N, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, sub_kwargs = cfg.SUB_KWARGS_R2N, selected = selected)) + (:LM in cfg.RUN_SOLVERS) && push!(results, run_LM_nnmf!(nls_model, x0; λ = cfg.LAMBDA_L0, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, selected = selected, sub_kwargs = cfg.SUB_KWARGS_R2N)) + (:LMTR in cfg.RUN_SOLVERS) && push!(results, run_LMTR_nnmf!(nls_model, x0; λ = cfg.LAMBDA_L0, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, selected = selected, sub_kwargs = cfg.SUB_KWARGS_R2N)) + + println("\n=== NNMF: solver comparison ===") + for m in results + println("\n→ ", m.name) + println(" status = ", m.status) + println(" time (s) = ", round(m.time, digits = 4)) + m.iters !== missing && println(" outer iters = ", m.iters) + println(" # f eval = ", m.fevals) + println(" # ∇f eval = ", m.gevals) + m.proxcalls !== missing && println(" # prox calls = ", Int(m.proxcalls)) + println(" final objective= ", round(obj(model, m.solution), digits = 4)) + end + + println("\nNNMF Config:"); print_config(cfg) + + data_nnmf = [ + (; name=m.name, + status=string(m.status), + time=round(m.time, digits=4), + fe=m.fevals, + ge=m.gevals, + prox = m.proxcalls === missing ? missing : Int(m.proxcalls), + obj = round(m.final_obj, digits=4)) + for m in results + ] + + return data_nnmf +end + +# ############################# +# # ========= Main ========== # +# ############################# + +function main(latex_out = false) + data_svm = bench_svm!(CFG) + data_nnmf = bench_nnmf!(CFG2) + + all_data = vcat(data_svm, data_nnmf) + + println("\n=== Full Benchmark Table ===") + # what is inside the table + for row in all_data + println(row) + end + + # save as latex format + if latex_out + + table_str = pretty_table(String, all_data; + header = ["Method", "Status", L"$t$($s$)", L"$\#f$", L"$\#\nabla f$", L"$\#prox$", "Objective"], + backend = Val(:latex), + alignment = [:l, :c, :r, :r, :r, :r, :r], + ) + + open("Benchmark.tex", "w") do io + write(io, table_str) + end + end + return nothing +end + diff --git a/paper/examples/Benchmark.tex b/paper/examples/Benchmark.tex new file mode 100644 index 00000000..41685108 --- /dev/null +++ b/paper/examples/Benchmark.tex @@ -0,0 +1,13 @@ +\begin{tabular}{lcrrrrr} + \hline + \textbf{Method} & \textbf{Status} & \textbf{$t$($s$)} & \textbf{$\#f$} & \textbf{$\#\nabla f$} & \textbf{$\#prox$} & \textbf{Objective} \\\hline + TR (LSR1, SVM) & first\_order & 3.9349 & 347 & 291 & 4037 & 179.837 \\ + R2N (LSR1, SVM) & first\_order & 1.9511 & 185 & 101 & 27932 & 192.493 \\ + LM (SVM) & first\_order & 19.7826 & 6 & 2876 & 1001 & 201.186 \\ + LMTR (SVM) & first\_order & 12.4967 & 11 & 1614 & 432 & 188.274 \\ + \hline + TR (LBFGS, NNMF) & first\_order & 0.1014 & 42 & 40 & 3160 & 976.06 \\ + R2N (LBFGS, NNMF) & first\_order & 0.4913 & 169 & 107 & 17789 & 411.727 \\ + LM (NNMF) & first\_order & 0.1157 & 14 & 7042 & 2601 & 131.184 \\ + LMTR (NNMF) & first\_order & 0.0697 & 9 & 4066 & 1435 & 131.186 \\\hline +\end{tabular} diff --git a/paper/examples/Project.toml b/paper/examples/Project.toml new file mode 100644 index 00000000..dfe38a1f --- /dev/null +++ b/paper/examples/Project.toml @@ -0,0 +1,10 @@ +[deps] +MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" +NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" +NLPModelsModifiers = "e01155f1-5c6f-4375-a9d8-616dd036575f" +PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" +ProximalAlgorithms = "140ffc9f-1907-541a-a177-7475e0a401e9" +ProximalCore = "dc4f5ac2-75d1-4f31-931e-60435d74994b" +ProximalOperators = "a725b495-10eb-56fe-b38b-717eba820537" +RegularizedProblems = "ea076b23-609f-44d2-bb12-a4ae45328278" +ShiftedProximalOperators = "d4fd37fa-580c-4e43-9b30-361c21aae263" diff --git a/paper/examples/comparison-config.jl b/paper/examples/comparison-config.jl new file mode 100644 index 00000000..b4b81044 --- /dev/null +++ b/paper/examples/comparison-config.jl @@ -0,0 +1,22 @@ +module ComparisonConfig + +Base.@kwdef mutable struct Config + SEED::Int = 1234 + LAMBDA_L0::Float64 = 1.0 + TOL::Float64 = 1e-4 + RTOL::Float64 = 1e-4 + MAXIT_PANOC::Int = 10000 + VERBOSE_PANOC::Bool = false + VERBOSE_RO::Int = 0 + RUN_SOLVERS::Vector{Symbol} = [:LMTR, :LM, :TR, :R2N] # mutable + QN_FOR_TR::Symbol = :LSR1 + QN_FOR_R2N::Symbol = :LBFGS + SUB_KWARGS_R2N::NamedTuple = (; max_iter = 200) + PRINT_TABLE::Bool = true +end + +# One global, constant *binding* to a mutable object = type stable & editable +const CFG = Config(QN_FOR_R2N=:LSR1) +const CFG2 = Config(QN_FOR_TR = :LBFGS) + +end # module \ No newline at end of file diff --git a/paper/examples/example1.jl b/paper/examples/example1.jl new file mode 100644 index 00000000..4e3b928f --- /dev/null +++ b/paper/examples/example1.jl @@ -0,0 +1,13 @@ +using LinearAlgebra, Random, ProximalOperators +using NLPModels, RegularizedProblems, RegularizedOptimization +using MLDatasets + +Random.seed!(1234) +model, nls_model, _ = RegularizedProblems.svm_train_model() # Build SVM model +f = LSR1Model(model) # L-SR1 Hessian approximation +λ = 1.0 # Regularization parameter +h = RootNormLhalf(λ) # Nonsmooth term +reg_nlp = RegularizedNLPModel(f, h) # Regularized problem +solver = R2NSolver(reg_nlp) # Choose solver +stats = RegularizedExecutionStats(reg_nlp) +solve!(solver, reg_nlp, stats; atol=1e-4, rtol=1e-4, verbose=1, sub_kwargs=(max_iter=200,)) diff --git a/paper/paper.bib b/paper/paper.bib index e69de29b..37da40fd 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -0,0 +1,161 @@ +@Article{ aravkin-baraldi-orban-2022, + Author = {A. Y. Aravkin and R. Baraldi and D. Orban}, + Title = {A Proximal Quasi-{N}ewton Trust-Region Method for Nonsmooth Regularized Optimization}, + Journal = siopt, + Year = 2022, + Volume = 32, + Number = 2, + Pages = {900--929}, + doi = {10.1137/21M1409536}, +} + +@Article{ aravkin-baraldi-orban-2024, + Author = {A. Y. Aravkin and R. Baraldi and D. Orban}, + Title = {A {L}evenberg–{M}arquardt Method for Nonsmooth Regularized Least Squares}, + Journal = sisc, + Year = 2024, + Volume = 46, + Number = 4, + Pages = {A2557--A2581}, + doi = {10.1137/22M1538971}, +} + +@Software{ leconte_linearoperators_jl_linear_operators_2023, + Author = {Leconte, Geoffroy and Orban, Dominique and Soares Siqueira, Abel and contributors}, + license = {MPL-2.0}, + Title = {{LinearOperators.jl: Linear Operators for Julia}}, + url = {https://github.com/JuliaSmoothOptimizers/LinearOperators.jl}, + version = {2.6.0}, + Year = 2023, +} + +@Article{ leconte-orban-2023, + Author = {G. Leconte and D. Orban}, + Title = {The Indefinite Proximal Gradient Method}, + Journal = coap, + Year = 2025, + Volume = 91, + Number = 2, + Pages = 861--903, + doi = {10.1007/s10589-024-00604-5}, +} + +@TechReport{ leconte-orban-2023-2, + Author = {G. Leconte and D. Orban}, + Title = {Complexity of trust-region methods with unbounded {H}essian approximations for smooth and nonsmooth optimization}, + Institution = gerad, + Year = 2023, + Type = {Cahier}, + Number = {G-2023-65}, + Address = gerad-address, + url = {https://www.gerad.ca/fr/papers/G-2023-65}, +} + +@TechReport{ diouane-habiboullah-orban-2024, + Author = {Youssef Diouane and Mohamed Laghdaf Habiboullah and Dominique Orban}, + Title = {A proximal modified quasi-Newton method for nonsmooth regularized optimization}, + Institution = {GERAD}, + Year = 2024, + Type = {Cahier}, + Number = {G-2024-64}, + Address = {Montr\'eal, Canada}, + doi = {10.48550/arxiv.2409.19428}, + url = {https://www.gerad.ca/fr/papers/G-2024-64}, +} + +@TechReport{ diouane-gollier-orban-2024, + Author = {Youssef Diouane and Maxence Gollier and Dominique Orban}, + Title = {A nonsmooth exact penalty method for equality-constrained optimization: complexity and implementation}, + Institution = {GERAD}, + Year = 2024, + Type = {Cahier}, + Number = {G-2024-65}, + Address = {Montr\'eal, Canada}, + doi = {10.13140/RG.2.2.16095.47527}, +} + +@Misc{orban-siqueira-cutest-2020, + author = {D. Orban and A. S. Siqueira and {contributors}}, + title = {{CUTEst.jl}: {J}ulia's {CUTEst} interface}, + month = {October}, + url = {https://github.com/JuliaSmoothOptimizers/CUTEst.jl}, + year = {2020}, + DOI = {10.5281/zenodo.1188851}, +} + +@Misc{orban-siqueira-nlpmodels-2020, + author = {D. Orban and A. S. Siqueira and {contributors}}, + title = {{NLPModels.jl}: Data Structures for Optimization Models}, + month = {July}, + url = {https://github.com/JuliaSmoothOptimizers/NLPModels.jl}, + year = {2020}, + DOI = {10.5281/zenodo.2558627}, +} + +@Misc{jso, + author = {T. Migot and D. Orban and A. S. Siqueira}, + title = {The {JuliaSmoothOptimizers} Ecosystem for Linear and Nonlinear Optimization}, + year = {2021}, + url = {https://juliasmoothoptimizers.github.io/}, + doi = {10.5281/zenodo.2655082}, +} + +@Misc{migot-orban-siqueira-optimizationproblems-2023, + author = {T. Migot and D. Orban and A. S. Siqueira}, + title = {OptimizationProblems.jl: A collection of optimization problems in Julia}, + year = {2023}, + doi = {10.5281/zenodo.3672094}, + url = {https://github.com/JuliaSmoothOptimizers/OptimizationProblems.jl}, +} + +@techreport{kim-park-2008, + title = {Sparse Nonnegative Matrix Factorization for Clustering}, + author = {Jingu Kim and Haesun Park}, + institution = {Georgia Inst. of Technology}, + number = {GT-CSE-08-01}, + year = {2008}, + url = {http://hdl.handle.net/1853/20058}, +} + +@InProceedings{ stella-themelis-sopasakis-patrinos-2017, + Author = {L. {Stella} and A. {Themelis} and P. {Sopasakis} and P. {Patrinos}}, + Title = {A simple and efficient algorithm for nonlinear model predictive control}, + Booktitle = {2017 IEEE 56th Annual Conference on Decision and Control (CDC)}, + Year = 2017, + Pages = {1939--1944}, + doi = {10.1109/CDC.2017.8263933}, +} + +@article{demarchi-jia-kanzow-mehlitz-2023, + author = {De~Marchi, Alberto and Jia, Xiaoxi and Kanzow, Christian and Mehlitz, Patrick}, + title = {Constrained composite optimization and augmented {L}agrangian methods}, + journal = {Mathematical Programming}, + year = {2023}, + month = {9}, + volume = {201}, + number = {1}, + pages = {863--896}, + doi = {10.1007/s10107-022-01922-4}, +} + +@Article{ themelis-stella-patrinos-2017, + Author = {Themelis, Andreas and Stella, Lorenzo and Patrinos, Panagiotis}, + Title = {Forward-Backward Envelope for the Sum of Two Nonconvex Functions: Further Properties and Nonmonotone line seach Algorithms}, + Journal = siopt, + Year = 2018, + Volume = 28, + Number = 3, + Pages = {2274--2303}, + doi = {10.1137/16M1080240}, +} + +@article{eckstein1992douglas, + title={On the Douglas—Rachford splitting method and the proximal point algorithm for maximal monotone operators}, + author={Eckstein, Jonathan and Bertsekas, Dimitri P}, + journal={Mathematical programming}, + volume={55}, + number={1}, + pages={293--318}, + year={1992}, + publisher={Springer} +} diff --git a/paper/paper.md b/paper/paper.md index 03c2d9a0..35f63522 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -1,5 +1,5 @@ --- -title: 'RegularizedOptimization.jl: A Julia framework for regularization-based nonlinear optimization' +title: 'RegularizedOptimization.jl: A Julia framework for regularized and nonsmooth optimization' tags: - Julia - nonsmooth optimization @@ -30,4 +30,181 @@ header-includes: | \setmonofont[Path = ./, Scale=0.68]{JuliaMono-Regular.ttf} --- -# References \ No newline at end of file +# Summary + +[RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) is a Julia package that implements families of quadratic regularization and trust-region methods for solving the nonsmooth optimization problem +\begin{equation}\label{eq:nlp} + \underset{x \in \mathbb{R}^n}{\text{minimize}} \quad f(x) + h(x) \quad \text{subject to} \quad c(x) = 0, +\end{equation} +where $f: \mathbb{R}^n \to \mathbb{R}$ and $c: \mathbb{R}^n \to \mathbb{R}^m$ are continuously differentiable, and $h: \mathbb{R}^n \to \mathbb{R} \cup \{+\infty\}$ is lower semi-continuous. +The nonsmooth objective $h$ can be a *regularizer* such as a sparsity-inducing penalty, model simple constraints such as $x$ belonging to a simple convex set, or be a combination of both. +All $f$, $h$ and $c$ can be nonconvex. +Together with the companion library [ShiftedProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ShiftedProximalOperators.jl) described below, RegularizedOptimization.jl provides a modular and extensible framework for solving \eqref{eq:nlp}, and developing novel solvers. +Currently, the following solvers are implemented: + +- **Trust-region solvers TR and TRDH** [@aravkin-baraldi-orban-2022;@leconte-orban-2023] +- **Quadratic regularization solvers R2, R2DH and R2N** [@diouane-habiboullah-orban-2024;@aravkin-baraldi-orban-2022] +- **Levenberg-Marquardt solvers LM and LMTR** [@aravkin-baraldi-orban-2024] used when $f$ is a least-squares residual +- **Augmented Lagrangian solver AL** [@demarchi-jia-kanzow-mehlitz-2023]. + +All solvers rely on first derivatives of $f$ and $c$, and optionally on their second derivatives in the form of Hessian-vector products. +If second derivatives are not available or too costly to compute, quasi-Newton approximations can be used. +In addition, the proximal mapping of the nonsmooth part $h$, or adequate models thereof, must be evaluated. +At each iteration, a step is computed by solving a subproblem of the form \eqref{eq:nlp} inexactly, in which $f$, $h$, and $c$ are replaced with appropriate models about the current iterate. +The solvers R2, R2DH and TRDH are particularly well suited to solve the subproblems, though they are general enough to solve \eqref{eq:nlp}. +All solvers have a non-monotone mode that enhance performance in practice on certain problems [@leconte-orban-2023;@diouane-habiboullah-orban-2024]. +All are implemented in an in-place fashion, so that re-solves incur no allocations. +To illustrate our claim of extensibility, a first version of the AL solver was implemented and submitted by an external contributor. + + + + + + + + +# Statement of need + +## Model-based framework for nonsmooth methods + +In Julia, \eqref{eq:nlp} can be solved using [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl), which implements splitting schemes and line-search–based methods [@stella-themelis-sopasakis-patrinos-2017;@themelis-stella-patrinos-2017]. +Among others, the **PANOC** [@stella-themelis-sopasakis-patrinos-2017] solver takes a step along a direction $d$, which depends on the gradient of $f$ modified by a L-BFGS Quasi-Newton approximation, followed by proximal steps on the nonsmooth part $h$. + +By contrast, [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) focuses on model-based trust-region and quadratic regularization methods, which typically require fewer evaluations of $f$ and its gradient than first-order line search methods, at the expense of more evaluations of proximal operators [@aravkin-baraldi-orban-2022]. +However, each proximal computation is inexpensive for numerous commonly used choices of $h$, such as separable penalties and bound constraints (see examples below), so that the overall approach is efficient for large-scale problems. + +When computing a step by (approximately) minimizing a model, [ShiftedProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ShiftedProximalOperators.jl) implements efficient allocation-free shifted proximal mappings. +Specifically, it supports shifted proximal operators of the form +$$ + \underset{t \in \mathbb{R}^n}{\arg\min} \, { \tfrac{1}{2} \|t - q\|_2^2 + \nu \psi(t + s; x) + \chi(s + t \mid \Delta \mathbb{B})} +$$ +where $q$ is given, $x$ and $s$ are fixed shifts, $\chi(\cdot \mid \Delta \mathbb{B})$ is the indicator of a ball of radius $\Delta > 0$ defined by a certain norm, and $\psi(\cdot; x)$ is a model of $h$ about $x$. +It is common to set $\psi(t + s; x) = h(x + s + t)$. + +These shifted operators allow to (i) incorporate bound or trust-region constraints via the indicator, which is required for the **TR** and **TRDH** solvers, and (ii) evaluate the above in place, without additional allocations, which is currently not possible with ProximalOperators.jl. + +RegularizedOptimization.jl provides a consistent API to formulate optimization problems and apply different solvers. +It integrates seamlessly with the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) [@jso] ecosystem, an academic organization for nonlinear optimization software development, testing, and benchmarking. + +The smooth objective $f$ can be defined via [NLPModels.jl](https://github.com/JuliaSmoothOptimizers/NLPModels.jl) [@orban-siqueira-nlpmodels-2020], which provides a standardized Julia API for representing nonlinear programming (NLP) problems. +Large collections of such problems are available in [CUTEst.jl](https://github.com/JuliaSmoothOptimizers/CUTEst.jl) [@orban-siqueira-cutest-2020] and [OptimizationProblems.jl](https://github.com/JuliaSmoothOptimizers/OptimizationProblems.jl) [@migot-orban-siqueira-optimizationproblems-2023], but a use can easily interface or model their own smooth objective. + +The nonsmooth term $h$ can be modeled using [ProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ProximalOperators.jl), which provides a broad collection of regularizers and indicators of simple sets. + +With $f$ and $h$ modeled as discussed above, the companion package [RegularizedProblems.jl](https://github.com/JuliaSmoothOptimizers/RegularizedProblems.jl) provides a straightforward way to pair them into a *Regularized Nonlinear Programming Model* + +```julia +reg_nlp = RegularizedNLPModel(f, h) +``` + +They can also be paired into a *Regularized Nonlinear Least Squares Model* if $f(x) = \tfrac{1}{2} \|F(x)\|^2$ for some residual $F: \mathbb{R}^n \to \mathbb{R}^m$, as would be the case with the **LM** and **LMTR** solvers. + +```julia +reg_nls = RegularizedNLSModel(f, h) +``` + +RegularizedProblems.jl also provides a set of instances commonly used in data science and in the nonsmooth optimization literature, where several choices of $f$ can be paired with various nonsmooth terms $h$. +This design makes for a convenient source of reproducible problem instances for testing and benchmarking the solvers in [RegularizedOptimization.jl](https://www.github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl). + +## Support for both exact and approximate Hessian + +In contrast with [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl), [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl), methods such as **R2N** and **TR** methods support exact Hessians as well as several Hessian approximations of $f$. +Hessian–vector products $v \mapsto Hv$ can be obtained via automatic differentiation through [ADNLPModels.jl](https://github.com/JuliaSmoothOptimizers/ADNLPModels.jl) or implemented manually. +Limited-memory and diagonal quasi-Newton approximations can be selected from [LinearOperators.jl](https://github.com/JuliaSmoothOptimizers/LinearOperators.jl). +This design allows solvers to exploit second-order information without explicitly forming dense or sparse Hessians, which is often expensive in time and memory, particularly at large scale. + +## Testing and documentation + +The package includes a comprehensive suite of unit tests that cover all functionalities, ensuring reliability and correctness. +Extensive documentation is provided, including a user guide, API reference, and examples to help users get started quickly. +Documentation is built using Documenter.jl. + +## Application + +A novel implementation of the exact penalty approach [@diouane-gollier-orban-2024] for equality-constrained smooth optimization is being developed based on RegularizedOptimization.jl. +In it, $h(x) = \|c(x)\|$ and the model $\psi(\cdot; x)$ differs from $h$ itself. +Specifically, $\psi(\cdot; x)$ is the norm of a linearization of $c$ about $x$. +This is not covered in the current version of [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl). + +# Examples + +We illustrate the capabilities of [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) on two nonsmooth and nonconvex problems: + +- **Support Vector Machine (SVM) with $\ell_{1/2}^{1/2}$ penalty** for image classification [@aravkin-baraldi-orban-2024]. +- **Nonnegative Matrix Factorization (NNMF) with $\ell_0$ penalty and bound constraints** [@kim-park-2008]. + +Below is a condensed example showing how to define and solve SVM problem, and perform a solve followed by a re-solve: + +```julia +using LinearAlgebra, Random, ProximalOperators +using NLPModels, RegularizedProblems, RegularizedOptimization +using MLDatasets + +Random.seed!(1234) +model, nls_model, _ = RegularizedProblems.svm_train_model() # Build SVM model +f = LSR1Model(model) # L-SR1 Hessian approximation +λ = 1.0 # Regularization parameter +h = RootNormLhalf(λ) # Nonsmooth term +reg_nlp = RegularizedNLPModel(f, h) # Regularized problem +solver = R2NSolver(reg_nlp) # Choose solver +stats = RegularizedExecutionStats(reg_nlp) +solve!(solver, reg_nlp, stats; atol=1e-4, rtol=1e-4, verbose=1, sub_kwargs=(max_iter=200,)) +solve!(solver, reg_nlp, stats; atol=1e-5, rtol=1e-5, verbose=1, sub_kwargs=(max_iter=200,)) +``` + +The NNMF problem can be set up in a similar fashion: + +```julia +Random.seed!(1234) +m, n, k = 100, 50, 5 +model, nls_model, _, selected = nnmf_model(m, n, k) # Build NNMF model +x0 = rand(model.meta.nvar) # Initial point +λ = norm(grad(model, rand(model.meta.nvar)), Inf) / 200 # Regularization parameter +h = NormL0(λ) # Nonsmooth term +reg_nls = RegularizedNLSModel(nls_model, h) # Regularized problem for LM +solver = LMSolver(reg_nls) # Choose solver +``` + +## Numerical results + +We compare **TR**, **R2N**, **LM** and **LMTR** from our library. + +We report the following solver statistics in the table: the convergence status of each solver, the number of evaluations of $f$, the number of evaluations of $\nabla f$, the number of proximal operator evaluations, the elapsed time in seconds and the final objective value. +On the SVM and NNMF problems, we use limited-memory SR1 and BFGS Hessian approximations, respectively. +The subproblem solver is **R2**. + +\input{examples/Benchmark.tex} + +- Note that for the **LM** and **LMTR** solvers, gradient evaluations count $\#\nabla f$ equals the number of Jacobian–vector and adjoint-Jacobian–vector products. + +All methods successfully reduced the optimality measure below the specified tolerance of $10^{-4}$, and thus converged to an approximate first-order stationary point. +Note that, the final objective values differ due to the nonconvexity of the problems. + +- **SVM with $\ell^{1/2}$ penalty:** **R2N** is the fastest, requiring the fewest gradient evaluations compared to all the other solvers. +However, it requires more proximal evaluations, but these are inexpensive. +**LMTR** and **LM** require the fewest function evaluations, but incur many Jacobian–vector products, and are the slowest. +Note that here, **LMTR** achieves the lowest objective value. +- **NNMF with constrained $\ell_0$ penalty:** **LMTR** is the fastest, and requires a fewer number of function evaluations than all the other solvers. Followed by **TR** which is the second fastest and requires the fewest gradient evaluations, however it achieves the highest objective value. +Note that both **LMTR** and **LM** achieve the lowest objective value. + +Additional tests (e.g., other regularizers, constraint types, and scaling dimensions) have also been conducted, and a full benchmarking campaign is currently underway. + +# Conclusion + +The experiments highlight the effectiveness of the solvers implemented in [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl). + + + + + + + +In ongoing research, the package will be extended with algorithms that enable to reduce the number of proximal evaluations, especially when the proximal mapping of $h$ is expensive to compute. + +# Acknowledgements + +The authors would like to thank Alberto Demarchi for his implementation of the Augmented Lagrangian solver. +Mohamed Laghdaf Habiboullah is supported by an excellence FRQNT grant. +Youssef Diouane, Maxence Gollier and Dominique Orban are partially supported by an NSERC Discovery Grant. + +# References