diff --git a/src/NLSolversBase.jl b/src/NLSolversBase.jl index efbf625..08a3ea5 100644 --- a/src/NLSolversBase.jl +++ b/src/NLSolversBase.jl @@ -5,8 +5,11 @@ module NLSolversBase import Base: gradient export AbstractObjective, NonDifferentiable, + NonDifferentiableCache, OnceDifferentiable, + OnceDifferentiableCache, TwiceDifferentiable, + TwiceDifferentiableCache, iscomplex, real_to_complex, complex_to_real, diff --git a/src/interface.jl b/src/interface.jl index 4ff2bc4..18322ac 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -1,62 +1,56 @@ -function _unchecked_value!(obj, x) - obj.f_calls .+= 1 - copy!(obj.last_x_f, x) - obj.f_x = obj.f(real_to_complex(obj, x)) -end -function value(obj, x) - if x != obj.last_x_f - obj.f_calls .+= 1 - return obj.f(real_to_complex(obj,x)) +function value(cache, obj::AbstractObjective, x) + if x != cache.last_x_f + cache.f_calls .+= 1 + return obj.f(real_to_complex(cache, x)) end - obj.f_x + cache.f_x end -function value!(obj, x) - if x != obj.last_x_f - _unchecked_value!(obj, x) +function value!(cache, obj::AbstractObjective, x) + if x != cache.last_x_f + cache.f_calls .+= 1 + copy!(cache.last_x_f, x) + cache.f_x = obj.f(real_to_complex(cache, x)) end - obj.f_x + cache.f_x end - -function _unchecked_gradient!(obj, x) - obj.g_calls .+= 1 - copy!(obj.last_x_g, x) - obj.g!(real_to_complex(obj, obj.g), real_to_complex(obj, x)) -end -function gradient!(obj::AbstractObjective, x) - if x != obj.last_x_g - _unchecked_gradient!(obj, x) +function gradient!(cache, obj::AbstractObjective, x) + if x != cache.last_x_g + cache.g_calls .+= 1 + copy!(cache.last_x_g, x) + obj.g!(real_to_complex(cache, cache.g), real_to_complex(cache, x)) end end -function value_gradient!(obj::AbstractObjective, x) - if x != obj.last_x_f && x != obj.last_x_g - obj.f_calls .+= 1 - obj.g_calls .+= 1 - copy!(obj.last_x_f, x) - copy!(obj.last_x_g, x) - obj.f_x = obj.fg!(real_to_complex(obj, obj.g), real_to_complex(obj, x)) - elseif x != obj.last_x_f - _unchecked_value!(obj, x) - elseif x != obj.last_x_g - _unchecked_gradient!(obj, x) +function value_gradient!(cache, obj::AbstractObjective, x) + if x != cache.last_x_f && x != cache.last_x_g + cache.f_calls .+= 1 + cache.g_calls .+= 1 + copy!(cache.last_x_f, x) + copy!(cache.last_x_g, x) + cache.f_x = obj.fg!(real_to_complex(cache, cache.g), real_to_complex(cache, x)) + elseif x != cache.last_x_f + cache.f_calls .+= 1 + copy!(cache.last_x_f, x) + cache.f_x = obj.f(real_to_complex(cache, x)) + elseif x != cache.last_x_g + cache.g_calls .+= 1 + copy!(cache.last_x_g, x) + obj.g!(real_to_complex(cache, cache.g), real_to_complex(cache, x)) end - obj.f_x + cache.f_x end -function _unchecked_hessian!(obj::AbstractObjective, x) - obj.h_calls .+= 1 - copy!(obj.last_x_h, x) - obj.h!(obj.H, x) -end -function hessian!(obj::AbstractObjective, x) - if x != obj.last_x_h - _unchecked_hessian!(obj, x) +function hessian!(cache, obj::AbstractObjective, x) + if x != cache.last_x_h + cache.h_calls .+= 1 + copy!(cache.last_x_h, x) + obj.h!(cache.H, x) end end -# Getters are without ! and accept only an objective and index or just an objective -value(obj::AbstractObjective) = obj.f_x -gradient(obj::AbstractObjective) = obj.g -gradient(obj::AbstractObjective, i::Integer) = obj.g[i] -hessian(obj::AbstractObjective) = obj.H +# Getters are without ! and accept only an objective cache and index or just an objective cache +value(cache::AbstractObjectiveCache) = cache.f_x +gradient(cache::AbstractObjectiveCache) = cache.g +gradient(cache::AbstractObjectiveCache, i::Integer) = cache.g[i] +hessian(cache::AbstractObjectiveCache) = cache.H diff --git a/src/objective_types.jl b/src/objective_types.jl index 4f2884c..d967e97 100644 --- a/src/objective_types.jl +++ b/src/objective_types.jl @@ -1,33 +1,46 @@ abstract type AbstractObjective end -real_to_complex(d::AbstractObjective, x) = iscomplex(d) ? real_to_complex(x) : x -complex_to_real(d::AbstractObjective, x) = iscomplex(d) ? complex_to_real(x) : x +abstract type AbstractObjectiveCache end +real_to_complex(c::AbstractObjectiveCache, x) = iscomplex(c) ? real_to_complex(x) : x +complex_to_real(c::AbstractObjectiveCache, x) = iscomplex(c) ? complex_to_real(x) : x # Used for objectives and solvers where no gradient is available/exists -mutable struct NonDifferentiable{T,A<:AbstractArray{T},Tcplx} <: AbstractObjective where {T<:Real, - Tcplx<:Union{Val{true},Val{false}} #if true, must convert back on every f call - } +struct NonDifferentiable <: AbstractObjective f +end + +mutable struct NonDifferentiableCache{T,A<:AbstractArray{T},Tcplx} <: AbstractObjectiveCache where {T<:Real, + Tcplx<:Union{Val{true},Val{false}} # true is complex x; must convert back on every f call + } f_x::T last_x_f::A f_calls::Vector{Int} end -iscomplex(obj::NonDifferentiable{T,A,Val{true}}) where {T,A} = true -iscomplex(obj::NonDifferentiable{T,A,Val{false}}) where {T,A} = false -NonDifferentiable(f,f_x::T, last_x_f::AbstractArray{T}, f_calls::Vector{Int}) where {T} = NonDifferentiable{T,typeof(last_x_f),Val{false}}(f,f_x,last_x_f,f_calls) #compatibility with old constructor - -function NonDifferentiable(f, x_seed::AbstractArray) - iscomplex = eltype(x_seed) <: Complex - if iscomplex +iscomplex(obj::NonDifferentiableCache{T,A,Val{true}}) where {T,A} = true +iscomplex(obj::NonDifferentiableCache{T,A,Val{false}}) where {T,A} = false +function NonDifferentiableCache(f, x_seed::AbstractArray) + x_complex = eltype(x_seed) <: Complex + if x_complex x_seed = complex_to_real(x_seed) end - NonDifferentiable{eltype(x_seed),typeof(x_seed),Val{iscomplex}}(f, f(x_seed), copy(x_seed), [1]) + NonDifferentiableCache{eltype(x_seed),typeof(x_seed),Val{x_complex}}(f(x_seed), copy(x_seed), [1]) end # Used for objectives and solvers where the gradient is available/exists -mutable struct OnceDifferentiable{T, Tgrad, A<:AbstractArray{T}, Tcplx} <: AbstractObjective where {T<:Real, Tgrad, Tcplx<:Union{Val{true},Val{false}}} +struct OnceDifferentiable <: AbstractObjective f g! fg! +end +# Automatically create the fg! helper function if only f and g! is provided +function OnceDifferentiable(f, g!) + function fg!(storage, x) + g!(storage, x) + return f(x) + end + return OnceDifferentiable(f, g!, fg!) +end + +mutable struct OnceDifferentiableCache{T, Tgrad, A<:AbstractArray{T}, Tcplx} <: AbstractObjectiveCache where {T<:Real, Tgrad, Tcplx<:Union{Val{true},Val{false}}} f_x::T g::Tgrad last_x_f::A @@ -35,40 +48,47 @@ mutable struct OnceDifferentiable{T, Tgrad, A<:AbstractArray{T}, Tcplx} <: Abstr f_calls::Vector{Int} g_calls::Vector{Int} end -iscomplex(obj::OnceDifferentiable{T,Tgrad,A,Val{true}}) where {T,Tgrad,A} = true -iscomplex(obj::OnceDifferentiable{T,Tgrad,A,Val{false}}) where {T,Tgrad,A} = false -OnceDifferentiable(f,g!,fg!,f_x::T, g::Tgrad, last_x_f::A, last_x_g::A, f_calls::Vector{Int}, g_calls::Vector{Int}) where {T, Tgrad, A<:AbstractArray{T}} = OnceDifferentiable{T,Tgrad,A,Val{false}}(f,g!,fg!,f_x, g, last_x_f, last_x_g, f_calls, g_calls) #compatibility with old constructor - -# The user friendly/short form OnceDifferentiable constructor -function OnceDifferentiable(f, g!, fg!, x_seed::AbstractArray) - iscomplex = eltype(x_seed) <: Complex +iscomplex(obj::OnceDifferentiableCache{T,Tgrad,A,Val{true}}) where {T,Tgrad,A} = true +iscomplex(obj::OnceDifferentiableCache{T,Tgrad,A,Val{false}}) where {T,Tgrad,A} = false +function OnceDifferentiableCache(f, g!, x_seed::AbstractArray) + function fg!(storage, x) + g!(storage, x) + return f(x) + end + OnceDifferentiableCache(f, g!, fg!, x_seed) +end +function OnceDifferentiableCache(f, g!, fg!, x_seed::AbstractArray) + x_complex = eltype(x_seed) <: Complex g = similar(x_seed) f_val = fg!(g, x_seed) - if iscomplex + if x_complex x_seed = complex_to_real(x_seed) g = complex_to_real(g) end - OnceDifferentiable{eltype(x_seed),typeof(g),typeof(x_seed),Val{iscomplex}}(f, g!, fg!, f_val, g, copy(x_seed), copy(x_seed), [1], [1]) -end -# Automatically create the fg! helper function if only f and g! is provided -function OnceDifferentiable(f, g!, x_seed::AbstractArray) - function fg!(storage, x) - g!(storage, x) - return f(x) - end - return OnceDifferentiable(f, g!, fg!, x_seed) + OnceDifferentiableCache{eltype(x_seed),typeof(g),typeof(x_seed),Val{x_complex}}(f_val, g, copy(x_seed), copy(x_seed), [1], [1]) end # Used for objectives and solvers where the gradient and Hessian is available/exists -mutable struct TwiceDifferentiable{T<:Real,Tgrad,A<:AbstractArray{T}} <: AbstractObjective +struct TwiceDifferentiable <: AbstractObjective f g! fg! h! +end +# Automatically create the fg! helper function if only f, g! and h! is provided +function TwiceDifferentiable(f, g!, h!) + function fg!(storage, x) + g!(storage, x) + return f(x) + end + return TwiceDifferentiable(f, g!, fg!, h!) +end + +mutable struct TwiceDifferentiableCache{T<:Real,Tgrad,Thess,A<:AbstractArray{T}} <: AbstractObjectiveCache f_x::T g::Tgrad - H::Matrix{T} + H::Thess last_x_f::A last_x_g::A last_x_h::A @@ -76,34 +96,23 @@ mutable struct TwiceDifferentiable{T<:Real,Tgrad,A<:AbstractArray{T}} <: Abstrac g_calls::Vector{Int} h_calls::Vector{Int} end -iscomplex(obj::TwiceDifferentiable) = false -# The user friendly/short form TwiceDifferentiable constructor -function TwiceDifferentiable(td::TwiceDifferentiable, x::AbstractArray) - value_gradient!(td, x) - hessian!(td, x) - td +iscomplex(obj::TwiceDifferentiableCache) = false +# The user friendly/short form TwiceDifferentiableCache constructor +function TwiceDifferentiableCache(f, g!, h!, x_seed::AbstractArray) + function fg!(storage, x) + g!(storage, x) + return f(x) + end + TwiceDifferentiableCache(f, g!, fg!, h!, x_seed) end - -function TwiceDifferentiable(f, g!, fg!, h!, x_seed::AbstractArray{T}) where T +function TwiceDifferentiableCache(f, g!, fg!, h!, x_seed::AbstractArray{T}) where T n_x = length(x_seed) + g = similar(x_seed) H = Array{T}(n_x, n_x) f_val = fg!(g, x_seed) h!(H, x_seed) - TwiceDifferentiable(f, g!, fg!, h!, f_val, - g, H, copy(x_seed), - copy(x_seed), copy(x_seed), [1], [1], [1]) -end -# Automatically create the fg! helper function if only f, g! and h! is provided -function TwiceDifferentiable(f, - g!, - h!, - x_seed::AbstractArray{T}) where T - function fg!(storage, x) - g!(storage, x) - return f(x) - end - return TwiceDifferentiable(f, g!, fg!, h!, x_seed) -end + TwiceDifferentiableCache(f_val, g, H, copy(x_seed), copy(x_seed), copy(x_seed), [1], [1], [1]) +end \ No newline at end of file diff --git a/test/interface.jl b/test/interface.jl index cc12207..3127df4 100644 --- a/test/interface.jl +++ b/test/interface.jl @@ -1,5 +1,4 @@ @testset "interface" begin - # Test example function exponential(x::Vector) return exp((2.0 - x[1])^2) + exp((3.0 - x[2])^2) @@ -27,75 +26,79 @@ g_x_alt = [-5.43656365691809, -218.39260013257694] h_x_alt = [16.30969097075427 0.; 0. 982.7667005965963] - nd = NonDifferentiable(exponential, x_seed) - od = OnceDifferentiable(exponential, exponential_gradient!, x_seed) - td = TwiceDifferentiable(exponential, exponential_gradient!, exponential_hessian!, x_seed) + nd = NonDifferentiable(exponential) + ndc = NonDifferentiableCache(exponential, x_seed) + od = OnceDifferentiable(exponential, exponential_gradient!) + odc = OnceDifferentiableCache(exponential, exponential_gradient!, x_seed) + td = TwiceDifferentiable(exponential, exponential_gradient!, exponential_hessian!) + tdc = TwiceDifferentiableCache(exponential, exponential_gradient!, exponential_hessian!, x_seed) - @test value(nd) == value(od) == value(td) == f_x_seed - @test value(nd, x_seed) == value(od, x_seed) == value(td, x_seed) + @test value(ndc) == value(odc) == value(tdc) == f_x_seed + @test value(ndc, nd, x_seed) == value(odc, od, x_seed) == value(tdc, td, x_seed) # change last_x_f manually to test branch - nd.last_x_f .*= 0 - od.last_x_f .*= 0 - td.last_x_f .*= 0 - @test value(nd, x_seed) == value(od, x_seed) == value(td, x_seed) - @test gradient(od) == gradient(td) == g_x_seed - @test hessian(td) == h_x_seed - @test nd.f_calls == od.f_calls == td.f_calls == [1] - @test od.g_calls == td.g_calls == [1] - @test td.h_calls == [1] + ndc.last_x_f .*= 0 + odc.last_x_f .*= 0 + tdc.last_x_f .*= 0 + @test value(ndc, nd, x_seed) == value(odc, od, x_seed) == value(tdc, td, x_seed) + @test gradient(odc) == gradient(tdc) == g_x_seed + @test hessian(tdc) == h_x_seed + @test ndc.f_calls == odc.f_calls == tdc.f_calls == [1] + @test odc.g_calls == tdc.g_calls == [1] + @test tdc.h_calls == [1] - @test_throws ErrorException gradient!(nd, x_alt) - gradient!(od, x_alt) - gradient!(td, x_alt) + # can't update gradient for NonDifferentiable + @test_throws ErrorException gradient!(ndc, nd, x_alt) + gradient!(odc, od, x_alt) + gradient!(tdc, td, x_alt) - @test value(nd) == value(od) == value(td) == f_x_seed - @test gradient(td) == g_x_alt - @test gradient(td) == [gradient(td, i) for i = 1:length(x_seed)] - @test hessian(td) == h_x_seed - @test nd.f_calls == od.f_calls == td.f_calls == [1] - @test od.g_calls == td.g_calls == [2] - @test td.h_calls == [1] + @test value(ndc) == value(odc) == value(tdc) == f_x_seed + @test gradient(tdc) == g_x_alt + @test gradient(tdc) == [gradient(tdc, i) for i = 1:length(x_seed)] + @test hessian(tdc) == h_x_seed + @test ndc.f_calls == odc.f_calls == tdc.f_calls == [1] + @test odc.g_calls == tdc.g_calls == [2] + @test tdc.h_calls == [1] - @test_throws ErrorException hessian!(nd, x_alt) - @test_throws ErrorException hessian!(od, x_alt) - hessian!(td, x_alt) + @test_throws ErrorException hessian!(ndc, nd, x_alt) + @test_throws ErrorException hessian!(odc, od, x_alt) + hessian!(tdc, td, x_alt) - @test value(nd) == value(od) == value(td) == f_x_seed - @test gradient(td) == g_x_alt - @test hessian(td) == h_x_alt - @test nd.f_calls == od.f_calls == td.f_calls == [1] - @test od.g_calls == td.g_calls == [2] - @test td.h_calls == [2] + @test value(ndc) == value(odc) == value(tdc) == f_x_seed + @test gradient(tdc) == g_x_alt + @test hessian(tdc) == h_x_alt + @test ndc.f_calls == odc.f_calls == tdc.f_calls == [1] + @test odc.g_calls == tdc.g_calls == [2] + @test tdc.h_calls == [2] - value!(nd, x_alt) - value!(od, x_alt) - value!(td, x_alt) - @test value(nd) == value(od) == value(td) == f_x_alt - @test gradient(td) == g_x_alt - @test hessian(td) == h_x_alt - @test nd.f_calls == od.f_calls == td.f_calls == [2] - @test od.g_calls == td.g_calls == [2] - @test td.h_calls == [2] + value!(ndc, nd, x_alt) + value!(odc, od, x_alt) + value!(tdc, td, x_alt) + @test value(ndc) == value(odc) == value(tdc) == f_x_alt + @test gradient(tdc) == g_x_alt + @test hessian(tdc) == h_x_alt + @test ndc.f_calls == odc.f_calls == tdc.f_calls == [2] + @test odc.g_calls == tdc.g_calls == [2] + @test tdc.h_calls == [2] - @test_throws ErrorException value_gradient!(nd, x_seed) - value_gradient!(od, x_seed) - value_gradient!(td, x_seed) - @test value(od) == value(td) == f_x_seed + @test_throws ErrorException value_gradient!(ndc, nd, x_seed) + value_gradient!(odc, od, x_seed) + value_gradient!(tdc, td, x_seed) + @test value(odc) == value(tdc) == f_x_seed # change last_x_f manually to test branch - od.last_x_f .*= 0 - td.last_x_f .*= 0 - value_gradient!(od, x_seed) - value_gradient!(td, x_seed) - @test value(od) == value(td) == f_x_seed + odc.last_x_f .*= 0 + tdc.last_x_f .*= 0 + value_gradient!(odc, od, x_seed) + value_gradient!(tdc, td, x_seed) + @test value(odc) == value(tdc) == f_x_seed # change last_x_g manually to test branch - od.last_x_g .*= 0 - td.last_x_g .*= 0 - value_gradient!(od, x_seed) - value_gradient!(td, x_seed) - @test value(od) == value(td) == f_x_seed - @test gradient(td) == g_x_seed - @test hessian(td) == h_x_alt - @test od.f_calls == td.f_calls == [3] - @test od.g_calls == td.g_calls == [3] - @test td.h_calls == [2] + odc.last_x_g .*= 0 + tdc.last_x_g .*= 0 + value_gradient!(odc, od, x_seed) + value_gradient!(tdc, td, x_seed) + @test value(odc) == value(tdc) == f_x_seed + @test gradient(tdc) == g_x_seed + @test hessian(tdc) == h_x_alt + @test odc.f_calls == tdc.f_calls == [3] + @test odc.g_calls == tdc.g_calls == [3] + @test tdc.h_calls == [2] end diff --git a/test/objective_types.jl b/test/objective_types.jl index 9c175b7..49d860d 100644 --- a/test/objective_types.jl +++ b/test/objective_types.jl @@ -20,32 +20,28 @@ x_seed = [0.0, 0.0] f_x_seed = 8157.682077608529 - nd = NonDifferentiable(exponential, x_seed) + nd = NonDifferentiable(exponential) + ndc = NonDifferentiableCache(exponential, x_seed) @test nd.f == exponential - @test value(nd) == f_x_seed - @test nd.last_x_f == [0.0, 0.0] - @test nd.f_calls == [1] - od = OnceDifferentiable(exponential, exponential_gradient!, x_seed) + @test value(ndc) == f_x_seed + @test ndc.last_x_f == [0.0, 0.0] + @test ndc.f_calls == [1] + od = OnceDifferentiable(exponential, exponential_gradient!) + odc = OnceDifferentiableCache(exponential, exponential_gradient!, x_seed) @test od.f == exponential @test od.g! == exponential_gradient! - @test value(od) == f_x_seed - @test od.last_x_f == [0.0, 0.0] - @test od.f_calls == [1] - @test od.g_calls == [1] + @test value(odc) == f_x_seed + @test odc.last_x_f == [0.0, 0.0] + @test odc.f_calls == [1] + @test odc.g_calls == [1] - td = TwiceDifferentiable(exponential, exponential_gradient!, exponential_hessian!, x_seed) - td_new = TwiceDifferentiable(td, x_seed) + td = TwiceDifferentiable(exponential, exponential_gradient!, exponential_hessian!) + tdc = TwiceDifferentiableCache(exponential, exponential_gradient!, exponential_hessian!, x_seed) @test td.f == exponential @test td.g! == exponential_gradient! - @test value(td) == f_x_seed - @test td.last_x_f == [0.0, 0.0] - @test td.f_calls == [1] - @test td.g_calls == [1] - @test td.h_calls == [1] - - td_from_td = TwiceDifferentiable(td, x_seed .- 1) - @test value(td_from_td) == value(td, x_seed .- 1) - gradient!(td, x_seed .- 1) - @test gradient(td_from_td) == gradient(td) - @test value(td_from_td, x_seed) == value(td, x_seed) + @test value(tdc) == f_x_seed + @test tdc.last_x_f == [0.0, 0.0] + @test tdc.f_calls == [1] + @test tdc.g_calls == [1] + @test tdc.h_calls == [1] end