diff --git a/Project.toml b/Project.toml index 02f46a80..a6a04161 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.1.0" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LinearOperators = "5c8ed15e-5a4c-59e4-a42b-c7e8811fb125" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" NLPModelsModifiers = "e01155f1-5c6f-4375-a9d8-616dd036575f" @@ -15,6 +16,7 @@ SolverCore = "ff4d7338-4cf1-434d-91df-b86cb86fb843" TSVD = "9449cd9e-2762-5aa3-a617-5413e99d722e" [compat] +LinearOperators = "2.5.2" NLPModels = "0.19" NLPModelsModifiers = "0.6" ProximalOperators = "0.15" diff --git a/benchmarks/tables/bpdn-constr-table.jl b/benchmarks/tables/bpdn-constr-table.jl new file mode 100644 index 00000000..419901c5 --- /dev/null +++ b/benchmarks/tables/bpdn-constr-table.jl @@ -0,0 +1,29 @@ +include("regulopt-tables.jl") + +# model +Random.seed!(1234) +compound = 1 +model, nls_model, sol = bpdn_model(compound, bounds = true) + +f = LSR1Model(model) +λ = norm(grad(model, zeros(model.meta.nvar)), Inf) / 10 +h = NormL1(λ) + +verbose = 0 # 10 +ϵ = 1.0e-6 +maxIter = 500 +maxIter_inner = 20 +options = ROSolverOptions(ν = 1.0, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options4 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options6 = ROSolverOptions(ν = 1.0, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = false) +options5 = ROSolverOptions(ν = 1.0, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = true) +options7 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner, reduce_TR = false) + +solvers = [:R2, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR] +subsolvers = [:None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH] +solver_options = [options, options, options5, options6, options, options, options, options, options] +subsolver_options = [options2, options2, options2, options2, options2, options7, options2, options3, options4] # n'importe lequel si subsolver = :None + +benchmark_table(f, 1:f.meta.nvar, sol, h, λ, solvers, subsolvers, solver_options, subsolver_options, "BPDN-cstr") \ No newline at end of file diff --git a/benchmarks/tables/bpdn-table.jl b/benchmarks/tables/bpdn-table.jl new file mode 100644 index 00000000..9c896d0d --- /dev/null +++ b/benchmarks/tables/bpdn-table.jl @@ -0,0 +1,31 @@ +include("regulopt-tables.jl") + +# model +Random.seed!(1234) +compound = 1 +model, nls_model, sol = bpdn_model(compound, bounds = false) + +# parameters +f = LSR1Model(model) +λ = norm(grad(model, zeros(model.meta.nvar)), Inf) / 10 +h = NormL1(λ) + +verbose = 0 # 10 +ϵ = 1.0e-6 +maxIter = 500 +maxIter_inner = 40 +options = ROSolverOptions(ν = 1.0, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options4 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options6 = ROSolverOptions(ν = 1.0, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = false) +options5 = ROSolverOptions(ν = 1.0, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = true) +options7 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner, reduce_TR = false) +options8 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner, reduce_TR = false) + +solvers = [:R2, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR, :TR] +subsolvers = [:None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH] +solver_options = [options, options, options5, options6, options, options, options, options, options, options] +subsolver_options = [options2, options2, options2, options2, options2, options7, options2, options3, options4, options8] # n'importe lequel si subsolver = :None + +benchmark_table(f, 1:f.meta.nvar, sol, h, λ, solvers, subsolvers, solver_options, subsolver_options, "BPDN") \ No newline at end of file diff --git a/benchmarks/tables/fh-table.jl b/benchmarks/tables/fh-table.jl new file mode 100644 index 00000000..dacbd84c --- /dev/null +++ b/benchmarks/tables/fh-table.jl @@ -0,0 +1,32 @@ +include("regulopt-tables.jl") +using ADNLPModels, DifferentialEquations + +Random.seed!(1234) +data, simulate, resid, misfit = RegularizedProblems.FH_smooth_term() +model = ADNLPModel(misfit, ones(5)) +f = LBFGSModel(model) + +λ = 1.0 +h = NormL0(λ) +ν = 1.0e2 +verbose = 0 #10 +maxIter = 1000 +maxIter_sub = 200 # max iter for subsolver +ϵ = 1.0e-4 +options = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_sub) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_sub) +options4 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_sub) +options5 = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = true) +options6 = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = false) +options7 = ROSolverOptions(spectral = false, psb = true, reduce_TR = false, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_sub) +options8 = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = false, reduce_TR = false) + +solvers = [:R2, :TRDH, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR] +subsolvers = [:None, :None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH] +solver_options = [options, options, options5, options6, options8, options, options, options, options, options] +subsolver_options = [options2, options2, options2, options2, options2, options2, options7, options2, options3, options4] +subset = 2:10 # issues with R2 alone + +benchmark_table(f, 1:f.meta.nvar, [], h, λ, solvers[subset], subsolvers[subset], solver_options[subset], subsolver_options[subset], + "FH with ν = $ν, λ = $λ") diff --git a/benchmarks/tables/nnmf-table.jl b/benchmarks/tables/nnmf-table.jl new file mode 100644 index 00000000..7f6b1a61 --- /dev/null +++ b/benchmarks/tables/nnmf-table.jl @@ -0,0 +1,29 @@ +include("regulopt-tables.jl") + +Random.seed!(1234) +m, n, k = 100, 50, 5 +model, A, selected = nnmf_model(m, n, k) +f = LSR1Model(model) +λ = norm(grad(model, rand(model.meta.nvar)), Inf) / 100 +h = NormL0(λ) +ν = 1.0 +ϵ = 1.0e-5 +maxIter = 500 +maxIter_inner = 40 +verbose = 0 #10 +options = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options4 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options5 = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = true) +options6 = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = false) +options7 = ROSolverOptions(spectral = false, psb = true, reduce_TR = false, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options8 = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = false, reduce_TR = false) + +solvers = [:R2, :TRDH, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR, :TR] +subsolvers = [:None, :None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH] +solver_options = [options, options, options5, options6, options8, options, options, options, options, options] +subsolver_options = [options2, options2, options2, options2, options2, options2, options7, options2, options3, options4] + +benchmark_table(f, selected, [], h, λ, solvers, subsolvers, solver_options, subsolver_options, + "NNMF with m = $m, n = $n, k = $k, ν = $ν, λ = $λ") \ No newline at end of file diff --git a/benchmarks/tables/regulopt-tables.jl b/benchmarks/tables/regulopt-tables.jl new file mode 100644 index 00000000..674698b9 --- /dev/null +++ b/benchmarks/tables/regulopt-tables.jl @@ -0,0 +1,157 @@ +using PrettyTables +using Random +using LinearAlgebra +using ProximalOperators +using NLPModels, NLPModelsModifiers, RegularizedProblems, RegularizedOptimization, ShiftedProximalOperators, SolverBenchmark +using Printf + +# utils for extracting stats / display table +modelname(nlp::LSR1Model) = "LSR1" +modelname(nlp::LBFGSModel) = "LBFGS" +modelname(nlp::SpectralGradientModel) = "SpectralGradient" +modelname(nlp::DiagonalQNModel) = "DiagonalQN" +subsolvername(subsolver::Symbol) = subsolver == :None ? "" : string("-", subsolver) +function options_str(options::ROSolverOptions, solver::Symbol, subsolver_options::ROSolverOptions, subsolver::Symbol) + if solver == :TRDH + out_str = !options.spectral ? (options.psb ? "-DiagQN-PSB" : "-DiagQN-Andrei") : "-Spectral" + out_str = (options.reduce_TR) ? out_str : string(out_str, "-noredTR") + elseif solver == :TR && subsolver == :TRDH + out_str = !subsolver_options.spectral ? (subsolver_options.psb ? "-DiagQN-PSB" : "-DiagQN-Andrei") : "-Spectral" + out_str = (subsolver_options.reduce_TR) ? out_str : string(out_str, "-noredTR") + else + out_str = "" + end + return out_str +end +grad_evals(nlp::AbstractNLPModel) = neval_grad(nlp) +grad_evals(nls::AbstractNLSModel) = neval_jtprod_residual(nls) + neval_jprod_residual(nls) +function nb_prox_evals(stats, solver::Symbol) + if solver ∈ [:TR, :R2, :TRDH] + prox_evals = sum(stats.solver_specific[:SubsolverCounter]) + else + error("not implemented") + end + return prox_evals +end + +function benchmark_table( + f::AbstractNLPModel, + selected, + sol, + h, + λ, + solvers, + subsolvers, + solver_options, + subsolver_options, + pb_name::String, +) + + row_names = ["$(solver)$(subsolvername(subsolver))$(options_str(opt, solver, subsolver_opt, subsolver))" + for (solver, opt, subsolver, subsolver_opt) in zip(solvers, solver_options, subsolvers, subsolver_options)] + + n∇f_evals = [] + nprox_evals = [] + solver_stats = [] + + for (solver, subsolver, opt, sub_opt) in zip(solvers, subsolvers, solver_options, subsolver_options) + @info " using $solver with subsolver = $subsolver" + args = solver == :R2 ? () : (NormLinf(1.0),) + if subsolver == :None + solver_out = eval(solver)(f, h, args..., opt, x0 = f.meta.x0, selected = selected) + else + solver_out = eval(solver)(f, h, args..., opt, x0 = f.meta.x0, subsolver = eval(subsolver), + subsolver_options = sub_opt, selected = selected) + end + push!(n∇f_evals, grad_evals(f)) + push!(nprox_evals, nb_prox_evals(solver_out, solver)) + push!(solver_stats, solver_out) + reset!(f) + + end + + if length(sol) == 0 + header = [ + "f(x)", + "h(x)/λ", + "ξ", + "∇f evals", + "prox calls", + ] + else + header = [ + "f(x) (true = $(round(obj(model, sol); sigdigits = 4)))", + "h(x)/λ", + "ξ", + "||x-x*||/||x*||", + "∇f evals", + "prox calls", + ] + end + + n_solvers = length(row_names) + data = Matrix{Any}(undef, n_solvers, length(header)) + for i=1:n_solvers + solver_out = solver_stats[i] + x = solver_out.solution + fx = solver_out.solver_specific[:Fhist][end] + hx = solver_out.solver_specific[:Hhist][end] + ξ = solver_out.dual_feas + n∇f = n∇f_evals[i] + nprox = nprox_evals[i] + if length(sol) == 0 + data[i, :] .= [fx, hx / λ, ξ, n∇f, nprox] + else + err = norm(x - sol) / norm(sol) + data[i, :] .= [fx, hx / λ, ξ, err, n∇f, nprox] + end + end + + if length(sol) == 0 + print_formats = ft_printf(["%7.3e", "%7.1e", "%7.1e", "%i","%i"], 1:length(header)) + else + print_formats = ft_printf(["%7.3e", "%7.1e", "%7.1e", "%7.3e", "%i","%i"], 1:length(header)) + end + + return pretty_table(data; + header = header, + row_names= row_names, + title = "$pb_name $(modelname(f)) $(typeof(h).name.name)", + # backend = Val(:latex), + formatters = (print_formats, + # (v, i, j) -> (SolverBenchmark.safe_latex_AbstractFloat(v)), + ), + ) +end + + + +# λ = norm(grad(model, rand(model.meta.nvar)), Inf) / 100000 +# h = NormL1(λ) +# benchmark_table(f, selected, [], h, λ, solvers, subsolvers, solver_options, subsolver_options, +# "NNMF with m = $m, n = $n, k = $k, ν = 1.0e-3,") + + + + +# header = ["TR LSR1 L0Box", "R2 LSR1 L0Box", "LM L0Box", "LMTR L0Box"] +# TR_out = TR(f, h, χ, options, x0 = f.meta.x0) +# n∇f_TR = neval_grad(f) +# prox_evals_TR = sum(TR_out.solver_specific[:SubsolverCounter]) +# reset!(f) +# R2_out = R2(f, h, options, x0 = f.meta.x0) +# n∇f_R2 = neval_grad(f) +# prox_evals_R2 = R2_out.iter +# reset!(f) +# LM_out = LM(nls_model, h, options, x0 = nls_model.meta.x0) +# n∇f_LM = neval_jtprod_residual(nls_model) + neval_jprod_residual(nls_model) +# prox_evals_LM = sum(LM_out.solver_specific[:SubsolverCounter]) +# reset!(nls_model) +# LMTR_out = LMTR(nls_model, h, χ, options, x0 = nls_model.meta.x0) +# n∇f_LMTR = neval_jtprod_residual(nls_model) + neval_jprod_residual(nls_model) +# prox_evals_LMTR = sum(LMTR_out.solver_specific[:SubsolverCounter]) +# reset!(nls_model) +# n∇f_evals = [n∇f_TR, n∇f_R2, n∇f_LM, n∇f_LMTR] +# nprox_evals = [prox_evals_TR, prox_evals_R2, prox_evals_LM, prox_evals_LMTR] + +# solver_stats = [TR_out, R2_out, LM_out, LMTR_out] \ No newline at end of file diff --git a/benchmarks/tables/svm-table.jl b/benchmarks/tables/svm-table.jl new file mode 100644 index 00000000..05be38eb --- /dev/null +++ b/benchmarks/tables/svm-table.jl @@ -0,0 +1,32 @@ +include("regulopt-tables.jl") +using MLDatasets + +Random.seed!(1234) +nlp_train, nls_train, sol_train = RegularizedProblems.svm_train_model() +nlp_test, nls_test, sol_test = RegularizedProblems.svm_test_model() +f = LSR1Model(nlp_train) +f_test = LSR1Model(nlp_test) +λ = 10.0 #norm(grad(model, rand(model.meta.nvar)), Inf) / 10 +h = NormL1(λ) + +ν = 1.0e0 +verbose = 0 #10 +ϵ = 1.0e-6 +maxIter = 500 +maxIter_inner = 40 +options = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options4 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) +options5 = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = true) +options6 = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = false) +options7 = ROSolverOptions(spectral = false, psb = true, reduce_TR = false, ϵa = ϵ, ϵr = ϵ, maxIter=maxIter_inner) + +solvers = [:R2, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR] +subsolvers = [:None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH] +solver_options = [options, options, options5, options6, options, options, options, options, options] +subsolver_options = [options2, options2, options2, options2, options2, options2, options7, options3, options4] +subset = 1:9 + +benchmark_table(f, 1:f.meta.nvar, [], h, λ, solvers[subset], subsolvers[subset], solver_options[subset], subsolver_options[subset], + "SVM with ν = $ν, λ = $λ") \ No newline at end of file diff --git a/examples/demo-bpdn-constr.jl b/examples/demo-bpdn-constr.jl index c4ccb9d6..21559562 100644 --- a/examples/demo-bpdn-constr.jl +++ b/examples/demo-bpdn-constr.jl @@ -9,7 +9,7 @@ include("plot-utils-bpdn.jl") Random.seed!(1234) function demo_solver(f, nls, sol, h, χ, suffix = "l0-linf") - options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 10) + options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 10, spectral = false, psb = true) @info " using TR to solve with" h χ reset!(f) @@ -22,6 +22,12 @@ function demo_solver(f, nls, sol, h, χ, suffix = "l0-linf") R2_out = R2(f, h, options, x0 = f.meta.x0) @info "R2 relative error" norm(R2_out.solution - sol) / norm(sol) plot_bpdn(R2_out, sol, "constr-r2-$(suffix)") + + @info " using TRDH to solve with" h χ + reset!(f) + TRDH_out = TRDH(f, h, χ, options, x0 = f.meta.x0) + @info "TRDH relative error" norm(TRDH_out.solution - sol) / norm(sol) + # plot_bpdn(TRDH_out, sol, "constr-trdh-$(suffix)") @info " using LMTR to solve with" h χ reset!(nls) diff --git a/examples/demo-nnmf-constr.jl b/examples/demo-nnmf-constr.jl index 334f936a..91f74071 100644 --- a/examples/demo-nnmf-constr.jl +++ b/examples/demo-nnmf-constr.jl @@ -9,16 +9,32 @@ include("plot-utils-nnmf.jl") Random.seed!(1234) function demo_solver(f, h, χ, selected, Avec, m, n, k, suffix = "l0-linf") - options = ROSolverOptions(ν = 1.0e-3, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 10, maxIter = 500) + options = ROSolverOptions(ν = 1.0e-3, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 10, maxIter = 500, spectral = true) @info " using TR to solve with" h χ reset!(f) TR_out = TR(f, h, χ, options, selected = selected) - plot_nnmf(TR_out, Avec, m, n, k, "tr-r2-$suffix") + # plot_nnmf(TR_out, Avec, m, n, k, "tr-r2-$suffix") @info " using R2 to solve with" h reset!(f) R2_out = R2(f, h, options, selected = selected) - plot_nnmf(R2_out, Avec, m, n, k, "r2-$suffix") + # plot_nnmf(R2_out, Avec, m, n, k, "r2-$suffix") + + @info " using TR with R2 as subproblem to solve with" h χ + reset!(f) + TR_out = TR(f, h, χ, options, selected = selected) + # plot_nnmf(TR_out, Avec, m, n, k, "tr-r2-$suffix") + + subsolver_options = ROSolverOptions(spectral = false, psb = true) + @info " using TR with TRDH as subproblem to solve with" h χ + reset!(f) + TR2_out = TR(f, h, χ, options, selected = selected, subsolver = TRDH, subsolver_options = subsolver_options) + # plot_nnmf(TR2_out, Avec, m, n, k, "tr-trdh-$suffix") + + @info " using TRDH to solve with" h χ + reset!(f) + TRDH_out = TRDH(f, h, χ, options, selected = selected) + # plot_nnmf(TRDH_out, Avec, m, n, k, "trdh-$suffix") end function demo_nnmf() diff --git a/src/RegularizedOptimization.jl b/src/RegularizedOptimization.jl index d71d32d2..458a8cce 100644 --- a/src/RegularizedOptimization.jl +++ b/src/RegularizedOptimization.jl @@ -7,7 +7,7 @@ using LinearAlgebra, Logging, Printf using ProximalOperators, TSVD # dependencies from us -using NLPModels, NLPModelsModifiers, ShiftedProximalOperators, SolverCore +using LinearOperators, NLPModels, NLPModelsModifiers, ShiftedProximalOperators, SolverCore include("utils.jl") include("input_struct.jl") @@ -15,6 +15,7 @@ include("PG_alg.jl") include("Fista_alg.jl") include("splitting.jl") include("TR_alg.jl") +include("TRDH_alg.jl") include("R2_alg.jl") include("LM_alg.jl") include("LMTR_alg.jl") diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl new file mode 100644 index 00000000..126ae2f1 --- /dev/null +++ b/src/TRDH_alg.jl @@ -0,0 +1,363 @@ +export TRDH + +""" + TRDH(nlp, h, χ, options; kwargs...) + TRDH(f, ∇f!, h, options, x0) + +A trust-region method with diagonal Hessian approximation for the problem + + min f(x) + h(x) + +where f: ℝⁿ → ℝ has a Lipschitz-continuous Jacobian, and h: ℝⁿ → ℝ is +lower semi-continuous and proper. + +About each iterate xₖ, a step sₖ is computed as an approximate solution of + + min φ(s; xₖ) + ψ(s; xₖ) subject to ‖s‖ ≤ Δₖ + +where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ Dₖ s is a quadratic approximation of f about xₖ, +ψ(s; xₖ) = h(xₖ + s), ‖⋅‖ is a user-defined norm, Dₖ is a diagonal Hessian approximation +and Δₖ > 0 is the trust-region radius. + +### Arguments + +* `nlp::AbstractNLPModel`: a smooth optimization problem +* `h`: a regularizer such as those defined in ProximalOperators +* `χ`: a norm used to define the trust region in the form of a regularizer +* `options::ROSolverOptions`: a structure containing algorithmic parameters + +The objective and gradient of `nlp` will be accessed. + +In the second form, instead of `nlp`, the user may pass in + +* `f` a function such that `f(x)` returns the value of f at x +* `∇f!` a function to evaluate the gradient in place, i.e., such that `∇f!(g, x)` store ∇f(x) in `g` +* `x0::AbstractVector`: an initial guess. + +### Keyword arguments + +* `x0::AbstractVector`: an initial guess (default: `nlp.meta.x0`) +* `selected::AbstractVector{<:Integer}`: (default `1:f.meta.nvar`) +* `Bk`: initial diagonal Hessian approximation (default: `(one(R) / options.ν) * I`). + +### Return values + +* `xk`: the final iterate +* `Fobj_hist`: an array with the history of values of the smooth objective +* `Hobj_hist`: an array with the history of values of the nonsmooth objective +* `Complex_hist`: an array with the history of number of inner iterations. +""" +function TRDH(nlp::AbstractNLPModel{R}, h, χ, options::ROSolverOptions{R}; kwargs...) where {R <: Real} + kwargs_dict = Dict(kwargs...) + x0 = pop!(kwargs_dict, :x0, nlp.meta.x0) + xk, k, outdict = TRDH( + x -> obj(nlp, x), + (g, x) -> grad!(nlp, x, g), + h, + options, + x0; + χ = χ, + l_bound = nlp.meta.lvar, + u_bound = nlp.meta.uvar, + kwargs..., + ) + ξ = outdict[:ξ] + stats = GenericExecutionStats(nlp) + set_status!(stats, outdict[:status]) + set_solution!(stats, xk) + set_objective!(stats, outdict[:fk] + outdict[:hk]) + set_residuals!(stats, zero(eltype(xk)), ξ ≥ 0 ? sqrt(ξ) : ξ) + set_iter!(stats, k) + set_time!(stats, outdict[:elapsed_time]) + set_solver_specific!(stats, :Fhist, outdict[:Fhist]) + set_solver_specific!(stats, :Hhist, outdict[:Hhist]) + set_solver_specific!(stats, :NonSmooth, outdict[:NonSmooth]) + set_solver_specific!(stats, :SubsolverCounter, outdict[:Chist]) + return stats +end + +# update l_bound_k and u_bound_k +function update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δ) + if is_subsolver + @. l_bound_k = max(xk - Δ, l_bound) + @. u_bound_k = min(xk + Δ, u_bound) + else + @. l_bound_k = max(-Δ, l_bound - xk) + @. u_bound_k = min(Δ, u_bound - xk) + end +end + +function TRDH( + f::F, + ∇f!::G, + h::H, + options::ROSolverOptions{R}, + x0::AbstractVector{R}; + χ::X = NormLinf(one(R)), + selected::AbstractVector{<:Integer} = 1:length(x0), + Bk = (one(R) / options.ν) * I, + kwargs..., +) where {R <: Real, F, G, H, X} + start_time = time() + elapsed_time = 0.0 + ϵ = options.ϵa + ϵr = options.ϵr + Δk = options.Δk + verbose = options.verbose + maxIter = options.maxIter + maxTime = options.maxTime + η1 = options.η1 + η2 = options.η2 + γ = options.γ + α = options.α + β = options.β + spectral = options.spectral + psb = options.psb + hess_init_val = (Bk isa UniformScaling) ? Bk.λ : (one(R) / options.ν) + reduce_TR = options.reduce_TR + + local l_bound, u_bound + has_bnds = false + kw_keys = keys(kwargs) + if :l_bound in kw_keys + l_bound = kwargs[:l_bound] + has_bnds = has_bnds || any(l_bound .!= R(-Inf)) + end + if :u_bound in kw_keys + u_bound = kwargs[:u_bound] + has_bnds = has_bnds || any(u_bound .!= R(Inf)) + end + + if verbose == 0 + ptf = Inf + elseif verbose == 1 + ptf = round(maxIter / 10) + elseif verbose == 2 + ptf = round(maxIter / 100) + else + ptf = 1 + end + + # initialize parameters + xk = copy(x0) + hk = h(xk[selected]) + if hk == Inf + verbose > 0 && @info "TRDH: finding initial guess where nonsmooth term is finite" + prox!(xk, h, x0, one(eltype(x0))) + hk = h(xk[selected]) + hk < Inf || error("prox computation must be erroneous") + verbose > 0 && @debug "TRDH: found point where h has value" hk + end + hk == -Inf && error("nonsmooth term is not proper") + + xkn = similar(xk) + s = zero(xk) + l_bound_k = similar(xk) + u_bound_k = similar(xk) + if h isa ShiftedProximableFunction # case TRDH is used as a subsolver + is_subsolver = true + ψ = shifted(h, xk) + @assert !has_bnds + l_bound = copy(ψ.l) + u_bound = copy(ψ.u) + @. l_bound_k = max(xk - Δk, l_bound) + @. u_bound_k = min(xk + Δk, u_bound) + has_bnds = true + set_bounds!(ψ, l_bound_k, u_bound_k) + else + is_subsolver = false + if has_bnds + @. l_bound_k = max(-Δk, l_bound - xk) + @. u_bound_k = min(Δk, u_bound - xk) + ψ = shifted(h, xk, l_bound_k, u_bound_k, selected) + else + ψ = shifted(h, xk, Δk, χ) + end + end + + Fobj_hist = zeros(maxIter) + Hobj_hist = zeros(maxIter) + Complex_hist = zeros(Int, maxIter) + if verbose > 0 + #! format: off + if reduce_TR + @info @sprintf "%6s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "‖Bₖ‖" "TRDH" + else + @info @sprintf "%6s %8s %8s %7s %8s %7s %7s %7s %7s %1s" "outer" "f(x)" "h(x)" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "‖Bₖ‖" "TRDH" + end + #! format: off + end + + local ξ1 + local ξ + k = 0 + + fk = f(xk) + ∇fk = similar(xk) + ∇f!(∇fk, xk) + ∇fk⁻ = copy(∇fk) + Dk = spectral ? SpectralGradient(hess_init_val, length(xk)) : + ((Bk isa UniformScaling) ? DiagonalQN(fill!(similar(xk), hess_init_val), psb) : DiagonalQN(diag(Bk), psb)) + DkNorm = norm(Dk.d, Inf) + νInv = (DkNorm + one(R) / (α * Δk)) + ν = one(R) / νInv + mν∇fk = -ν .* ∇fk + + optimal = false + tired = maxIter > 0 && k ≥ maxIter || elapsed_time > maxTime + + while !(optimal || tired) + k = k + 1 + elapsed_time = time() - start_time + Fobj_hist[k] = fk + Hobj_hist[k] = hk + + # model for prox-gradient step to update Δk if ||s|| is too small and ξ1 + φ1(d) = ∇fk' * d + mk1(d) = φ1(d) + ψ(d) + + if reduce_TR + prox!(s, ψ, mν∇fk, ν) + Complex_hist[k] += 1 + ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() + ξ1 > 0 || error("TR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + + if ξ1 ≥ 0 && k == 1 + ϵ += ϵr * sqrt(ξ1) # make stopping test absolute and relative + end + + if sqrt(ξ1) < ϵ + # the current xk is approximately first-order stationary + optimal = true + continue + end + end + + Δ_effective = reduce_TR ? min(β * χ(s), Δk) : Δk + # update radius + if has_bnds + update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δ_effective) + set_bounds!(ψ, l_bound_k, u_bound_k) + else + set_radius!(ψ, Δ_effective) + end + + # model with diagonal hessian + φ(d) = ∇fk' * d + (d' * (Dk.d .* d)) / 2 + mk(d) = φ(d) + ψ(d) + + iprox!(s, ψ, ∇fk, Dk) + Complex_hist[k] += 1 + + sNorm = χ(s) + xkn .= xk .+ s + fkn = f(xkn) + hkn = h(xkn[selected]) + hkn == -Inf && error("nonsmooth term is not proper") + + Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() + ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() + + if (ξ ≤ 0 || isnan(ξ)) + error("TRDH: failed to compute a step: ξ = $ξ") + end + + if !reduce_TR + if ξ ≥ 0 && k == 1 + ϵ += ϵr * sqrt(ξ) # make stopping test absolute and relative + end + + if sqrt(ξ) < ϵ + # the current xk is approximately first-order stationary + optimal = true + continue + end + end + + ρk = Δobj / ξ + + TR_stat = (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "=") + + if (verbose > 0) && (k % ptf == 0) + #! format: off + if reduce_TR + @info @sprintf "%6d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt(ξ1) sqrt(ξ) ρk Δk χ(xk) sNorm norm(Dk.d) TR_stat + else + @info @sprintf "%6d %8.1e %8.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt(ξ) ρk Δk χ(xk) sNorm norm(Dk.d) TR_stat + end + #! format: on + end + + if η2 ≤ ρk < Inf + Δk = max(Δk, γ * sNorm) + !has_bnds && set_radius!(ψ, Δk) + end + + if η1 ≤ ρk < Inf + xk .= xkn + has_bnds && update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δk) + has_bnds && set_bounds!(ψ, l_bound_k, u_bound_k) + #update functions + fk = fkn + hk = hkn + shift!(ψ, xk) + ∇f!(∇fk, xk) + push!(Dk, s, ∇fk - ∇fk⁻) # update QN operator + DkNorm = norm(Dk.d, Inf) + ∇fk⁻ .= ∇fk + end + + if ρk < η1 || ρk == Inf + Δk = Δk / 2 + has_bnds && update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δk) + has_bnds ? set_bounds!(ψ, l_bound_k, u_bound_k) : set_radius!(ψ, Δk) + end + + νInv = (DkNorm + one(R) / (α * Δk)) + ν = one(R) / νInv + mν∇fk .= -ν .* ∇fk + + tired = k ≥ maxIter || elapsed_time > maxTime + end + + if verbose > 0 + if k == 1 + @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk + elseif optimal + #! format: off + if reduce_TR + @info @sprintf "%6d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) νInv + #! format: on + @info "TRDH: terminating with √ξ1 = $(sqrt(ξ1))" + else + @info @sprintf "%6d %8.1e %8.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k fk hk sqrt(ξ) "" Δk χ(xk) χ(s) νInv + @info "TRDH: terminating with √ξ = $(sqrt(ξ))" + end + end + end + + !reduce_TR && (ξ1 = ξ) # for output dict + + status = if optimal + :first_order + elseif elapsed_time > maxTime + :max_time + elseif tired + :max_iter + else + :exception + end + outdict = Dict( + :Fhist => Fobj_hist[1:k], + :Hhist => Hobj_hist[1:k], + :Chist => Complex_hist[1:k], + :NonSmooth => h, + :status => status, + :fk => fk, + :hk => hk, + :ξ => ξ1 ≥ 0 ? sqrt(ξ1) : ξ1, + :elapsed_time => elapsed_time, + ) + + return xk, k, outdict +end diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 0a2e3f9a..3841e69c 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -72,7 +72,7 @@ function TR( β = options.β local l_bound, u_bound - if has_bounds(f) + if has_bounds(f) || subsolver == TRDH l_bound = f.meta.lvar u_bound = f.meta.uvar end @@ -101,9 +101,8 @@ function TR( xkn = similar(xk) s = zero(xk) - ψ = - has_bounds(f) ? shifted(h, xk, max.(-Δk, l_bound - xk), min.(Δk, u_bound - xk), selected) : - shifted(h, xk, Δk, χ) + ψ = (has_bounds(f) || subsolver == TRDH) ? + shifted(h, xk, max.(-Δk, l_bound - xk), min.(Δk, u_bound - xk), selected) : shifted(h, xk, Δk, χ) Fobj_hist = zeros(maxIter) Hobj_hist = zeros(maxIter) @@ -170,13 +169,14 @@ function TR( subsolver_options.ϵa = k == 1 ? 1.0e-5 : max(ϵ, min(1e-2, sqrt(ξ1)) * ξ1) ∆_effective = min(β * χ(s), Δk) - has_bounds(f) ? + (has_bounds(f) || subsolver == TRDH) ? set_bounds!(ψ, max.(-∆_effective, l_bound - xk), min.(∆_effective, u_bound - xk)) : set_radius!(ψ, ∆_effective) - s, iter, _ = with_logger(subsolver_logger) do - subsolver(φ, ∇φ!, ψ, subsolver_options, s) + subsolver_options.Δk = ∆_effective / 10 + s, iter, outdict = with_logger(subsolver_logger) do + subsolver(φ, ∇φ!, ψ, subsolver_options, s; Bk = Bk) end - Complex_hist[k] = iter + Complex_hist[k] = sum(outdict[:Chist]) sNorm = χ(s) xkn .= xk .+ s @@ -203,12 +203,12 @@ function TR( if η2 ≤ ρk < Inf Δk = max(Δk, γ * sNorm) - !has_bounds(f) && set_radius!(ψ, Δk) + !(has_bounds(f) || subsolver == TRDH) && set_radius!(ψ, Δk) end if η1 ≤ ρk < Inf xk .= xkn - has_bounds(f) && set_bounds!(ψ, max.(-Δk, l_bound - xk), min.(Δk, u_bound - xk)) + (has_bounds(f) || subsolver == TRDH) && set_bounds!(ψ, max.(-Δk, l_bound - xk), min.(Δk, u_bound - xk)) #update functions fk = fkn @@ -227,7 +227,7 @@ function TR( if ρk < η1 || ρk == Inf Δk = Δk / 2 - has_bounds(f) ? set_bounds!(ψ, max.(-Δk, l_bound - xk), min.(Δk, u_bound - xk)) : + (has_bounds(f) || subsolver == TRDH) ? set_bounds!(ψ, max.(-Δk, l_bound - xk), min.(Δk, u_bound - xk)) : set_radius!(ψ, Δk) end tired = k ≥ maxIter || elapsed_time > maxTime diff --git a/src/input_struct.jl b/src/input_struct.jl index 0aa2243c..2ff0c799 100644 --- a/src/input_struct.jl +++ b/src/input_struct.jl @@ -16,6 +16,9 @@ mutable struct ROSolverOptions{R} γ::R # trust region buffer θ::R # step length factor in relation to Hessian norm β::R # TR size as factor of first PG step + spectral::Bool # for TRDH: use spectral gradient update if true, otherwise DiagonalQN + psb::Bool # for TRDH with DiagonalQN (spectral = false): use PSB update if true, otherwise Andrei update + reduce_TR::Bool function ROSolverOptions{R}(; ϵa::R = √eps(R), @@ -33,6 +36,9 @@ mutable struct ROSolverOptions{R} γ::R = R(3), θ::R = R(1e-3), β::R = 1 / eps(R), + spectral::Bool = false, + psb::Bool = false, + reduce_TR::Bool = true, ) where {R <: Real} @assert ϵa ≥ 0 @assert ϵr ≥ 0 @@ -48,7 +54,7 @@ mutable struct ROSolverOptions{R} @assert γ > 1 @assert θ > 0 @assert β ≥ 1 - return new{R}(ϵa, ϵr, neg_tol, Δk, verbose, maxIter, maxTime, σmin, η1, η2, α, ν, γ, θ, β) + return new{R}(ϵa, ϵr, neg_tol, Δk, verbose, maxIter, maxTime, σmin, η1, η2, α, ν, γ, θ, β, spectral, psb, reduce_TR) end end diff --git a/src/utils.jl b/src/utils.jl index f2913a75..3d84154b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -3,3 +3,20 @@ function LinearAlgebra.opnorm(B; kwargs...) _, s, _ = tsvd(B) return s[1] end + +ShiftedProximalOperators.iprox!( + y::AbstractVector, + ψ::ShiftedProximableFunction, + g::AbstractVector, + D::DiagonalQN, +) = iprox!(y, ψ, g, D.d) + +ShiftedProximalOperators.iprox!( + y::AbstractVector, + ψ::ShiftedProximableFunction, + g::AbstractVector, + D::SpectralGradient, +) = iprox!(y, ψ, g, fill!(similar(g), D.d[1])) + +LinearAlgebra.diag(op::DiagonalQN) = copy(op.d) +LinearAlgebra.diag(op::SpectralGradient{T}) where {T} = zeros(T, op.nrow) .* op.d[1] diff --git a/test/runtests.jl b/test/runtests.jl index 1dc1db2f..c4629acc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,7 +5,7 @@ using NLPModels, NLPModelsModifiers, RegularizedProblems, RegularizedOptimizatio const global compound = 1 const global nz = 10 * compound -const global options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 10) +const global options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 10, spectral = true) const global bpdn, bpdn_nls, sol = bpdn_model(compound) const global bpdn2, bpdn_nls2, sol2 = bpdn_model(compound, bounds = true) const global λ = norm(grad(bpdn, zeros(bpdn.meta.nvar)), Inf) / 10 diff --git a/test/test_bounds.jl b/test/test_bounds.jl index 1998bd8a..5546de90 100644 --- a/test/test_bounds.jl +++ b/test/test_bounds.jl @@ -1,6 +1,6 @@ for (mod, mod_name) ∈ ((x -> x, "exact"), (LSR1Model, "lsr1"), (LBFGSModel, "lbfgs")) for (h, h_name) ∈ ((NormL0(λ), "l0"), (NormL1(λ), "l1")) - for solver_sym ∈ (:TR, :R2) + for solver_sym ∈ (:TR, :R2, :TRDH) solver_sym == :TR && mod_name == "exact" && continue solver_name = string(solver_sym) solver = eval(solver_sym)