Merge pull request #65 from JuliaNLSolvers/pkm/nonallogdogleg

Allow non-allocating dogleg
JuliaNLSolvers · Jan 21, 2024 · 00f3b00 · 00f3b00 · pkofod · Jan 21, 2024
2 parents 2b12412 + 5c5f598
commit 00f3b00
Show file tree

Hide file tree

Showing 14 changed files with 136 additions and 73 deletions.
diff --git a/src/NLSolvers.jl b/src/NLSolvers.jl
@@ -185,17 +185,25 @@ include("lsqsolve/problem_types.jl")
 include("lsqsolve/optimization.jl")
 export LeastSquaresProblem, LeastSquaresOptions
 
-function negate(problem::AbstractProblem, A)
-
-end
 function negate(::InPlace, A::AbstractArray)
     A .= .-A
     return A
 end
 function negate(::OutOfPlace, A::AbstractArray)
     -A
 end
-
+function _scale(::InPlace, B, A, m)
+    B .= A .* m
+end
+function _scale(::OutOfPlace, B, A, m)
+    A .* m
+end
+function _copyto(::InPlace, z, x)
+    copyto!(z, x)
+end
+function _copyto(::OutOfPlace, z, x)
+    copy(x)
+end
 mstyle(problem::AbstractProblem) = problem.mstyle
 
 """

diff --git a/src/globalization/linesearches/backtracking.jl b/src/globalization/linesearches/backtracking.jl
@@ -158,7 +158,7 @@ function find_steplength(mstyle, ls::Backtracking, φ::T, λ) where {T}
         Tf(ls.ratio), Tf(ls.decrease), ls.maxiter, ls.verbose
 
     #== factor in Armijo condition ==#
-    t = -decrease * dφ0
+    t = decrease * dφ0
 
     iter, α, β = 0, λ, λ # iteration variables
     f_α = φ(α) # initial function value

diff --git a/src/globalization/trs_solvers/solvers/Dogleg.jl b/src/globalization/trs_solvers/solvers/Dogleg.jl
@@ -20,7 +20,7 @@ struct Dogleg{T} <: TRSPSolver
 end
 Dogleg() = Dogleg(nothing)
 
-function (dogleg::Dogleg)(∇f, H, Δ, p, scheme; abstol = 1e-10, maxiter = 50)
+function (dogleg::Dogleg)(∇f, H, Δ, p, scheme, mstyle; abstol = 1e-10, maxiter = 50)
     T = eltype(p)
     n = length(∇f)
 
@@ -32,7 +32,8 @@ function (dogleg::Dogleg)(∇f, H, Δ, p, scheme; abstol = 1e-10, maxiter = 50)
     norm_d_cauchy = norm(d_cauchy)
     if norm_d_cauchy ≥ Δ
         shrink = Δ / norm_d_cauchy # inv(Δ/norm_d_cauchy) puts it on the border
-        p .= d_cauchy .* shrink
+
+        p = _scale(mstyle, p, d_cauchy, shrink)
         interior = false
     else
         # Else, calculate (Quasi-)Newton step. If this is interior, then take the
@@ -42,7 +43,6 @@ function (dogleg::Dogleg)(∇f, H, Δ, p, scheme; abstol = 1e-10, maxiter = 50)
         p = find_direction!(p, H, nothing, ∇f, scheme)
         norm_p = norm(p)
         if norm_p ≤ Δ # fixme really need to add the 20% slack here (see TR book and NTR)
-            p .= p
             if norm_p < Δ
                 interior = true
             else
@@ -75,7 +75,8 @@ function (dogleg::Dogleg)(∇f, H, Δ, p, scheme; abstol = 1e-10, maxiter = 50)
             else
                 t = T(0)
             end
-            p .= d_cauchy .+ t .* p
+
+            p, _ = move(mstyle, p, d_cauchy, p, p, t)
             interior = false
         end
     end

diff --git a/src/globalization/trs_solvers/solvers/NTR.jl b/src/globalization/trs_solvers/solvers/NTR.jl
@@ -51,6 +51,7 @@ function (ms::NTR)(
     Δ::T,
     s,
     scheme,
+    mstyle,
     λ0 = 0;
     abstol = 1e-10,
     maxiter = 50,

diff --git a/src/globalization/trs_solvers/solvers/NWI.jl b/src/globalization/trs_solvers/solvers/NWI.jl
@@ -148,7 +148,7 @@ Returns:
     solved - Whether or not a solution was reached (as opposed to
       terminating early due to maxiter)
 """
-function (ms::NWI)(∇f, H, Δ, p, scheme; abstol = 1e-10, maxiter = 50)
+function (ms::NWI)(∇f, H, Δ, p, scheme, mstyle; abstol = 1e-10, maxiter = 50)
     T = eltype(p)
     n = length(∇f)
     H = H isa UniformScaling ? Diagonal(copy(∇f) .* 0 .+ 1) : H

diff --git a/src/globalization/trs_solvers/solvers/TCG.jl b/src/globalization/trs_solvers/solvers/TCG.jl
@@ -22,6 +22,7 @@ function (ms::TCG)(
     Δ::T,
     s,
     scheme,
+    mstyle,
     λ0 = 0;
     abstol = 1e-10,
     maxiter = min(5, length(s)),

diff --git a/src/nlsolve/trustregions/newton.jl b/src/nlsolve/trustregions/newton.jl
@@ -12,10 +12,6 @@ has_param(::NormedResiduals) = false
 function value(nr::NormedResiduals, x)
     Fx = nr.F.F(nr.Fx, x)
 
-    # this is just to grab them outside, but this hsould come from the convergence info perhaps?
-    nr.x .= x
-    nr.Fx .= Fx
-
     f = (norm(Fx)^2) / 2
     return f
 end
@@ -24,32 +20,26 @@ function upto_gradient(nr::NormedResiduals, Fx, x)
     Fx, Jx = nr.F.FJ(nr.Fx, nr.Fx * x', x)
 
     # this is just to grab them outside, but this hsould come from the convergence info perhaps?
-    nr.x .= x
-    nr.Fx .= Fx
-
     f = (norm(Fx)^2) / 2
-    Fx .= Jx' * Fx
-    return f, Fx
+    JxtFx = Jx' * Fx
+    return f, JxtFx
 end
 function upto_hessian(nr::NormedResiduals, Fx, Jx, x)  #Fx is the gradient and Jx is the Hessian
     Fx, Jx = nr.F.FJ(nr.Fx, Jx, x)
 
     # this is just to grab them outside, but this hsould come from the convergence info perhaps?
-    nr.x .= x
-    nr.Fx .= Fx
-
     f = (norm(Fx)^2) / 2
     # this is the gradient
-    Fx .= Jx' * Fx
+    JxtFx = Jx' * Fx
     # As you may notice, this can be expensive... Because the gradient
     # is going to be very simple. May want to create a
     # special type or way to hook into trust regions here. We can exploit
     # that we only need the cauchy and the newton steps, not any shifted
     # systems. There is no need to get the full hessian. because these two
     # steps are don't need these multiplies
     # This is the Hessian
-    Jx .= Jx' * Jx
-    return f, Fx, Jx
+    Jx2 = Jx' * Jx
+    return f, JxtFx, Jx2
 end
 
 function solve(
@@ -58,7 +48,7 @@ function solve(
     approach::TrustRegion{<:Union{SR1,DBFGS,BFGS,Newton},<:Any,<:Any},
     options::NEqOptions,
 )
-    if !(mstyle(prob) === InPlace())
+  if !(mstyle(prob) === InPlace()) && !(approach.spsolve isa Dogleg)
         throw(
             ErrorException(
                 "solve() not defined for OutOfPlace() with Trustregion for NEqProblem",
@@ -75,17 +65,16 @@ function solve(
     normed_residual = NormedResiduals(x_outer, Fx_outer, F)
     ρ2F0 = sqrt(value(normed_residual, x_outer) * 2)
     ρF0 = norm(normed_residual.Fx, Inf)
-    td = OptimizationProblem(normed_residual)
+    td = OptimizationProblem(normed_residual, inplace = mstyle(prob) == InPlace())
+    options.maxiter
     res = solve(td, x, approach, OptimizationOptions(maxiter = options.maxiter))
-    # normed_residual to get Fx
-    # f0*2 is wrong because it's 2norm
     newinfo = (
         solution = solution(res),
-        best_residual = Fx_outer,
+        best_residual = value(F, Fx_outer, solution(res)),
         ρF0 = ρF0,
         ρ2F0 = ρ2F0,
         time = res.info.time,
         iter = res.info.iter,
     )
-    return ConvergenceInfo(approach, newinfo, options)
+    return  ConvergenceInfo(approach, newinfo, options)
 end
diff --git a/src/objectives.jl b/src/objectives.jl
@@ -19,8 +19,16 @@ struct ScalarObjective{Tf,Tg,Tfg,Tfgh,Th,Thv,Tbf,P}
     batched_f::Tbf
     param::P
 end
-ScalarObjective(; f = nothing, g = nothing, fg = nothing, fgh = nothing, h = nothing) =
-    ScalarObjective(f, g, fg, fgh, h, nothing, nothing, nothing)
+ScalarObjective(;
+     f = nothing,
+     g = nothing,
+     fg = nothing,
+     fgh = nothing,
+     h = nothing,
+     hv = nothing,
+     batched_f = nothing,
+     param = nothing,
+ ) = ScalarObjective(f, g, fg, fgh, h, hv, batched_f, param)
 has_param(so::ScalarObjective) = so.param === nothing ? false : true
 function value(so::ScalarObjective, x)
     if has_param(so)
@@ -177,21 +185,6 @@ function _value(mo, R::VectorObjective, Fx, x)
     (ϕ = (norm(Fx)^2) / 2, Fx = Fx)
 end
 
-struct LsqWrapper{Tobj,TF,TJ} <: ObjWrapper
-    R::Tobj
-    F::TF
-    J::TJ
-end
-function (lw::LsqWrapper)(x)
-    F = lw.R(lw.F, x)
-    sum(abs2, F) / 2
-end
-function (lw::LsqWrapper)(∇f, x)
-    _F, _J = lw.R(lw.F, lw.J, x)
-    copyto!(∇f, sum(_J; dims = 1))
-    sum(abs2, _F), ∇f
-end
-
 struct LeastSquaresObjective{TFx,TJx,Tf,Tfj,Td}
     Fx::TFx
     Jx::TJx

diff --git a/src/optimize/trustregions/optimize/inplace_loop.jl b/src/optimize/trustregions/optimize/inplace_loop.jl
@@ -16,7 +16,7 @@ function solve(
     objvars;
     initial_Δ = 20.0,
 )
-    if !(mstyle(problem) === InPlace())
+    if !(mstyle(problem) === InPlace()) && !(approach.spsolve == Dogleg())
         throw(
             ErrorException("solve() not defined for OutOfPlace() with TrustRegion solvers"),
         )
@@ -49,15 +49,15 @@ function solve(
     qnvars = QNVars(objvars.z, objvars.z)
     p = copy(objvars.x)
 
-    objvars, Δkp1, reject =
+    objvars, Δkp1, reject, qnvars =
         iterate!(p, objvars, Δk, approach, problem, options, qnvars, false)
 
     iter = 1
     # Check for convergence
     is_converged = converged(approach, objvars, ∇f0, options, reject, Δkp1)
     while iter <= options.maxiter && !any(is_converged)
         iter += 1
-        objvars, Δkp1, reject =
+        objvars, Δkp1, reject, qnvars =
             iterate!(p, objvars, Δkp1, approach, problem, options, qnvars, false)
 
         # Check for convergence
@@ -113,9 +113,11 @@ function iterate!(
     scheme, subproblemsolver = modelscheme(approach), algorithm(approach)
     y, d, s = qnvars.y, qnvars.d, qnvars.s
     fx = fz
-    copyto!(x, z)
-    copyto!(∇fx, ∇fz)
-    spr = subproblemsolver(∇fx, B, Δk, p, scheme; abstol = 1e-10)
+
+    x = _copyto(mstyle(problem), x, z)
+    ∇fx = _copyto(mstyle(problem), ∇fx, ∇fz)
+
+    spr = subproblemsolver(∇fx, B, Δk, p, scheme, problem.mstyle; abstol = 1e-10)
     Δm = -spr.mz
 
     # Grab the model value, m. If m is zero, the solution, z, does not improve
@@ -124,14 +126,18 @@ function iterate!(
     # tive value, we may conclude that "something" is wrong. We might be at a
     # ridge (positive-indefinite case) for example, or the scaling of the model
     # is such that we cannot satisfy ||∇f|| < tol.
-    if abs(spr.mz) < eps(T)
+    if abs(spr.mz) < eps(spr.mz)
         # set flag to check for problems
     end
 
-    z = retract(problem, z, x, p)
+    z = retract(problem, z, x, spr.p)
     # Update before acceptance, to keep adding information about the hessian
     # even when the step is not "good" enough.
 
+    y, d, s = qnvars.y, qnvars.d, qnvars.s
+    fx = fz
+    # Should build a good code for picking update model.
+
     fz, ∇fz, B, s, y = update_obj!(problem, spr.p, y, ∇fx, z, ∇fz, B, scheme, scale)
 
     # Δf is often called ared or Ared for actual reduction. I prefer "change in"
@@ -144,13 +150,19 @@ function iterate!(
     Δkp1, reject_step = update_trust_region(spr, R, p)
 
     if reject_step
-        z .= x
+        z = _copyto(mstyle(problem), z, x)
+        ∇fz = _copyto(mstyle(problem), ∇fz, ∇fx)
         fz = fx
-        ∇fz .= ∇fx
+        # This is correct because 
+        s = _scale(mstyle(problem), s, s, 0) # z - x
+        y = _scale(mstyle(problem), y, y, 0) # ∇fz - ∇fx
+        # and will cause quasinewton updates to not update
+        # this seems incorrect as it's already updated, should hold off here
     end
     return (x = x, fx = fx, ∇fx = ∇fx, z = z, fz = fz, ∇fz = ∇fz, B = B, Pg = nothing),
     Δkp1,
-    reject_step
+    reject_step,
+    QNVars(d, s, y)
 end
 
 function update_trust_region(spr, R, p)

diff --git a/src/quasinewton/approximations/newton.jl b/src/quasinewton/approximations/newton.jl
@@ -12,7 +12,7 @@ function default_newton_linsolve(B, g)
     B \ g
 end
 function default_newton_linsolve(d, B, g)
-    d .= (B \ g)
+    d = (B \ g)
 end
 
 function init_f∇fB(prob, scheme::Newton, ∇fz, B, x)

diff --git a/test/globalization/truncatedconjugategradient.jl b/test/globalization/truncatedconjugategradient.jl
@@ -5,29 +5,29 @@ using Test, NLSolvers, LinearAlgebra
     H = [0.3 0.0; 0.0 0.9]
     g = [0.2, 0.4]
 
-    m(g, H, 0.7, rand(2), 1)
+    m(g, H, 0.7, rand(2), 1, NLSolvers.InPlace())
 
     # Since -H is negative definite the solution is guaranteed
     # to be at the boundary unless g = 0
     for Δ in range(0, 100; step = 0.1)
-        @test norm(m(g, -H, Δ, rand(2), 1).p, 2) ≈ Δ
+        @test norm(m(g, -H, Δ, rand(2), 1, NLSolvers.InPlace()).p, 2) ≈ Δ
     end
 
     # Small gradient entries
     g .= [1e-12, 1e-9]
     for Δ in range(0, 100; step = 0.1)
-        @test norm(m(g, -H, Δ, rand(2), 1).p, 2) ≈ Δ
+        @test norm(m(g, -H, Δ, rand(2), 1, NLSolvers.InPlace()).p, 2) ≈ Δ
     end
 
     # Mixed gradient entries
     g .= [1e12, 1e-9]
     for Δ in range(0, 100; step = 0.1)
-        @test norm(m(g, -H, Δ, rand(2), 1).p, 2) ≈ Δ
+        @test norm(m(g, -H, Δ, rand(2), 1, NLSolvers.InPlace()).p, 2) ≈ Δ
     end
 
     # Zero case
     g = [0.0, 0.0]
     for Δ in range(0, 100; step = 0.1)
-        @test norm(m(g, -H, Δ, rand(2), 1).p, 2) == 0
+        @test norm(m(g, -H, Δ, rand(2), 1, NLSolvers.InPlace()).p, 2) == 0
     end
 end