diff --git a/src/LMTR_alg.jl b/src/LMTR_alg.jl index 6eff53c1..6e778d90 100644 --- a/src/LMTR_alg.jl +++ b/src/LMTR_alg.jl @@ -115,7 +115,7 @@ function LMTR( if verbose > 0 #! format: off - @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "1/ν" "TR" + @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "ν" "TR" #! format: on end @@ -132,9 +132,9 @@ function LMTR( σmax, found_σ = opnorm(Jk) found_σ || error("operator norm computation failed") - νInv = (1 + θ) * σmax^2 # ‖J'J‖ = ‖J‖² + ν = θ / σmax^2 # ‖J'J‖ = ‖J‖² - mν∇fk = -∇fk / νInv + mν∇fk = -∇fk * ν optimal = false tired = k ≥ maxIter || elapsed_time > maxTime @@ -174,8 +174,8 @@ function LMTR( # Take first proximal gradient step s1 and see if current xk is nearly stationary. # s1 minimizes φ1(d) + ‖d‖² / 2 / ν + ψ(d) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0)) - ν = 1 / (νInv + 1 / (Δk * α)) - prox!(s, ψ, mν∇fk, ν) + ν1 = 1 / (1/ν + 1 / (Δk * α)) + prox!(s, ψ, mν∇fk, ν1) ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps() ξ1 > 0 || error("LMTR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") @@ -197,8 +197,8 @@ function LMTR( set_bounds!(ψ, max.(-∆_effective, l_bound - xk), min.(∆_effective, u_bound - xk)) : set_radius!(ψ, ∆_effective) subsolver_options.Δk = ∆_effective / 10 - subsolver_options.ν = ν - subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : () + subsolver_options.ν = ν1 + subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν1, nls.meta.nvar),) : () s, iter, _ = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) end @@ -232,7 +232,7 @@ function LMTR( if (verbose > 0) && (k % ptf == 0) #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk ∆_effective χ(xk) sNorm νInv TR_stat + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk ∆_effective χ(xk) sNorm ν TR_stat #! format: on end @@ -255,8 +255,8 @@ function LMTR( jtprod_residual!(nls, xk, Fk, ∇fk) σmax, found_σ = opnorm(Jk) found_σ || error("operator norm computation failed") - νInv = (1 + θ) * σmax^2 # ‖J'J‖ = ‖J‖² - @. mν∇fk = -∇fk / νInv + ν = θ / σmax^2 # ‖J'J‖ = ‖J‖² + @. mν∇fk = -∇fk * ν end if ρk < η1 || ρk == Inf @@ -273,7 +273,7 @@ function LMTR( @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk elseif optimal #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) νInv + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) ν #! format: on @info "LMTR: terminating with √ξ1 = $(sqrt(ξ1))" end diff --git a/src/LM_alg.jl b/src/LM_alg.jl index 8d8c8980..c44a9041 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -127,7 +127,7 @@ function LM( σmax, found_σ = opnorm(Jk) found_σ || error("operator norm computation failed") - νInv = (1 + θ) * (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ + ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ s = zero(xk) @@ -174,12 +174,11 @@ function LM( # take first proximal gradient step s1 and see if current xk is nearly stationary # s1 minimizes φ1(s) + ‖s‖² / 2 / ν + ψ(s) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0)). - ν = 1 / νInv ∇fk .*= -ν # reuse gradient storage prox!(s, ψ, ∇fk, ν) ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps() # TODO: isn't mk(s) returned by subsolver? ξ1 > 0 || error("LM: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - sqrt_ξ1_νInv = sqrt(ξ1 * νInv) + sqrt_ξ1_νInv = sqrt(ξ1 / ν) if ξ1 ≥ 0 && k == 1 ϵ_increment = ϵr * sqrt_ξ1_νInv @@ -196,7 +195,7 @@ function LM( # subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv^(1.5), sqrt_ξ1_νInv * 1e-3) # 1.0e-5 default subsolver_options.ν = ν - subsolver_args = subsolver == R2DH ? (SpectralGradient(νInv, nls.meta.nvar),) : () + subsolver_args = subsolver == R2DH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : () @debug "setting inner stopping tolerance to" subsolver_options.optTol s, iter, _ = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) @@ -226,7 +225,7 @@ function LM( if (verbose > 0) && (k % ptf == 0) #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ) ρk σk norm(xk) norm(s) νInv σ_stat + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ) ρk σk norm(xk) norm(s) 1/ν σ_stat #! format: off end @@ -260,7 +259,7 @@ function LM( if ρk < η1 || ρk == Inf σk = σk * γ end - νInv = (1 + θ) * (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ + ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ tired = k ≥ maxIter || elapsed_time > maxTime end @@ -269,7 +268,7 @@ function LM( @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk elseif optimal #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) νInv + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) 1/ν #! format: on @info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" end diff --git a/src/R2DH.jl b/src/R2DH.jl index 7f01912d..99eddba2 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -191,7 +191,7 @@ function R2DH( Dkσk = D.d .+ σk DNorm = norm(D.d, Inf) - ν = 1 / ((DNorm + σk) * (1 + θ)) + ν = θ / (DNorm + σk) mν∇fk = -ν * ∇fk sqrt_ξ_νInv = one(R) @@ -274,7 +274,7 @@ function R2DH( end Dkσk .= D.d .+ σk - ν = 1 / ((DNorm + σk) * (1 + θ)) + ν = θ / (DNorm + σk) tired = maxIter > 0 && k ≥ maxIter if !tired diff --git a/src/R2N.jl b/src/R2N.jl index aba356a1..8a2d6c17 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -134,7 +134,7 @@ function R2N( λmax, found_λ = opnorm(Bk) found_λ || error("operator norm computation failed") - νInv = (1 + θ) * (σk + λmax) + ν = θ / (σk + λmax) sqrt_ξ1_νInv = one(R) optimal = false @@ -166,11 +166,11 @@ function R2N( # take first proximal gradient step s1 and see if current xk is nearly stationary # s1 minimizes φ1(s) + ‖s‖² / 2 / ν + ψ(s) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0)). - subsolver_options.ν = 1 / νInv + subsolver_options.ν = ν prox!(s, ψ, -subsolver_options.ν * ∇fk, subsolver_options.ν) ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() ξ1 > 0 || error("R2N: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - sqrt_ξ1_νInv = sqrt(ξ1 * νInv) + sqrt_ξ1_νInv = sqrt(ξ1 / ν) if ξ1 ≥ 0 && k == 1 ϵ_increment = ϵr * sqrt_ξ1_νInv @@ -188,7 +188,7 @@ function R2N( subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv^(1.5), sqrt_ξ1_νInv * 1e-3) verbose > 0 && @debug "setting inner stopping tolerance to" subsolver_options.optTol subsolver_options.σk = σk - subsolver_args = subsolver == R2DH ? (SpectralGradient(νInv, f.meta.nvar),) : () + subsolver_args = subsolver == R2DH ? (SpectralGradient(1 / ν, f.meta.nvar),) : () s, iter, _ = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) end @@ -256,7 +256,7 @@ function R2N( if ρk < η1 || ρk == Inf σk = σk * γ end - νInv = (1 + θ) * (σk + λmax) + ν = θ / (σk + λmax) tired = k ≥ maxIter || elapsed_time > maxTime end diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 57e83155..2eaac3b0 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -200,8 +200,7 @@ function TRDH( ∇f!(∇fk, xk) ∇fk⁻ = copy(∇fk) DNorm = norm(D.d, Inf) - νInv = DNorm + one(R) / (α * Δk) - ν = one(R) / νInv + ν = (α * Δk)/(DNorm + one(R)) mν∇fk = -ν .* ∇fk sqrt_ξ_νInv = one(R) @@ -318,8 +317,7 @@ function TRDH( has_bnds ? set_bounds!(ψ, l_bound_k, u_bound_k) : set_radius!(ψ, Δk) end - νInv = reduce_TR ? (DNorm + one(R) / (α * Δk)) : (DNorm + one(R) / α) - ν = one(R) / νInv + ν = reduce_TR ? (α * Δk)/(DNorm + one(R)) : α / (DNorm + one(R)) mν∇fk .= -ν .* ∇fk tired = k ≥ maxIter || elapsed_time > maxTime diff --git a/src/input_struct.jl b/src/input_struct.jl index 7242b90a..1b8b1581 100644 --- a/src/input_struct.jl +++ b/src/input_struct.jl @@ -34,7 +34,7 @@ mutable struct ROSolverOptions{R} α::R = 1 / eps(R), ν::R = eps(R)^(1 / 5), γ::R = R(3), - θ::R = eps(R)^(1 / 5), + θ::R = 1/(1+eps(R)^(1 / 5)), β::R = 1 / eps(R), reduce_TR::Bool = true, ) where {R <: Real}