Skip to content

Commit

Permalink
Merge pull request #97 from sebapersson/main
Browse files Browse the repository at this point in the history
Allow custom gradient and Hessian in OptimizationFunction
  • Loading branch information
DanielVandH committed Jan 23, 2024
2 parents 2268df9 + 6501492 commit d2b7bd1
Show file tree
Hide file tree
Showing 6 changed files with 294 additions and 20 deletions.
180 changes: 163 additions & 17 deletions src/problem_updates.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@ update_initial_estimate(prob::OptimizationProblem, θ) = remake(prob; u0=θ)
update_initial_estimate(prob::OptimizationProblem, sol::SciMLBase.OptimizationSolution) = update_initial_estimate(prob, sol.u)

# replace obj with a new obj
@inline function replace_objective_function(f::OF, new_f::FF) where {iip,AD,F,G,H,HV,C,CJ,CH,HP,CJP,CHP,S,S2,O,EX,CEX,SYS,LH,LHP,HCV,CJCV,CHCV,LHCV,OF<:OptimizationFunction{iip,AD,F,G,H,HV,C,CJ,CH,HP,CJP,CHP,S,S2,O,EX,CEX,SYS,LH,LHP,HCV,CJCV,CHCV,LHCV},FF}
@inline function replace_objective_function(f::OF, new_f::FF, new_grad!, new_hess!) where {OF, FF}
# scimlbase needs to add a constructorof method for OptimizationFunction before we can just do @set with accessors.jl.
# g = @set f.f = new_f
# return g
return OptimizationFunction{iip,AD,FF,G,H,HV,C,CJ,CH,HP,CJP,CHP,S,S2,O,EX,CEX,SYS,LH,LHP,HCV,CJCV,CHCV,LHCV}(
return OptimizationFunction(
new_f,
f.adtype,
f.grad,
f.hess,
f.hv,
f.cons,
f.cons_j,
f.cons_h,
f.hess_prototype,
f.cons_jac_prototype,
f.adtype;
grad=new_grad!,
hess=new_hess!,
hv=f.hv,
cons=f.cons,
cons_j=f.cons_j,
cons_h=f.cons_h,
hess_prototype=f.hess_prototype,
cons_jac_prototype=f.cons_jac_prototype,
f.cons_hess_prototype,
f.syms,
f.paramsyms,
Expand All @@ -33,14 +33,17 @@ update_initial_estimate(prob::OptimizationProblem, sol::SciMLBase.OptimizationSo
)
end

@inline function replace_objective_function(prob::F, obj::FF) where {F<:OptimizationProblem,FF}
g = replace_objective_function(prob.f, obj)
@inline function replace_objective_function(prob::F, obj::FF, grad, hess) where {F<:OptimizationProblem, FF}
g = replace_objective_function(prob.f, obj, grad, hess)
return remake(prob; f=g)
end

# fix the objective function's nth parameter at θₙ
function construct_fixed_optimisation_function(prob::OptimizationProblem, n::Integer, θₙ, cache)
original_f = prob.f
original_grad! = prob.f.grad
original_hess! = prob.f.hess

new_f = @inline (θ, p) -> begin
cache2 = get_tmp(cache, θ)
for i in eachindex(cache2)
Expand All @@ -54,7 +57,72 @@ function construct_fixed_optimisation_function(prob::OptimizationProblem, n::Int
end
return original_f(cache2, p)
end
return replace_objective_function(prob, new_f)

# In case a custom gradient is provided the new gradient function the full gradient
# is computed, but the value for parameter n is dropped
if isnothing(original_grad!)
# Default for an OptimizationProblem
new_grad! = nothing
else
new_grad! = (g, θ, p) -> begin
cache2 = get_tmp(cache, θ)
for i in eachindex(cache2)
if i < n
cache2[i] = θ[i]
elseif i > n
cache2[i] = θ[i-1]
else
cache2[n] = θₙ
end
end
_g = similar(cache2)
original_grad!(_g, cache2, p)
for i in eachindex(cache2)
if i < n
g[i] = _g[i]
elseif i > n
g[i-1] = _g[i]
end
end
return nothing
end
end

# Likewise, if a custom Hessian is provided the full Hessian is computed,
# but the value for parameter n is dropped
if isnothing(original_hess!)
# Default for an OptimizationProblem
new_hess! = nothing
else
new_hess! = (H, θ, p) -> begin
cache2 = get_tmp(cache, θ)
for i in eachindex(cache2)
if i < n
cache2[i] = θ[i]
elseif i > n
cache2[i] = θ[i-1]
else
cache2[n] = θₙ
end
end
_H = Matrix{eltype(cache2)}(undef, length(cache2), length(cache2))
original_hess!(_H, cache2, p)
for i in eachindex(cache2)
for j in eachindex(cache2)
if i == n || j == n
continue
else i < n && j < n
ix = i < n ? i : i - 1
jx = j < n ? j : j - 1
H[ix, jx] = _H[i, j]
end
end
end
return nothing
end
end

return replace_objective_function(prob, new_f, new_grad!, new_hess!)
end

# fix the objective function's (n1, n2) parameters at (θn1, θn2)
Expand All @@ -66,6 +134,9 @@ function construct_fixed_optimisation_function(prob::OptimizationProblem, n::NTu
θₙ₁, θₙ₂ = θₙ₂, θₙ₁
end
original_f = prob.f
original_grad! = prob.f.grad
original_hess! = prob.f.hess

new_f = @inline (θ, p) -> begin
cache2 = get_tmp(cache, θ)
@inbounds for i in eachindex(cache2)
Expand All @@ -83,9 +154,84 @@ function construct_fixed_optimisation_function(prob::OptimizationProblem, n::NTu
end
return original_f(cache2, p)
end
return replace_objective_function(prob, new_f)

# In case a custom gradient is provided the new gradient function the full gradient
# but the value for parameter n₁ and n₂ are dropped
if isnothing(original_grad!)
# Default for an OptimizationProblem
new_grad! = nothing
else
new_grad! = (g, θ, p) -> begin
cache2 = get_tmp(cache, θ)
for i in eachindex(cache2)
if i < n₁
cache2[i] = θ[i]
elseif i == n₁
cache2[i] = θₙ₁
elseif n₁ < i < n₂
cache2[i] = θ[i-1]
elseif i == n₂
cache2[i] = θₙ₂
elseif i > n₂
cache2[i] = θ[i-2]
end
end
_g = similar(cache2)
original_grad!(_g, cache2, p)
for i in eachindex(cache2)
if i == n₁ || i == n₂
continue
else
ix = i < n₁ ? i : (i < n₂ ? i - 1 : i - 2)
g[ix] = _g[i]
end
end
return nothing
end
end

# Likewise, if a custom Hessian is provided the full Hessian is computed,
# but the value for parameter n₁ and n₂ are dropped
if isnothing(original_hess!)
# Default for an OptimizationProblem
new_hess! = nothing
else
new_hess! = (H, θ, p) -> begin
cache2 = get_tmp(cache, θ)
for i in eachindex(cache2)
if i < n₁
cache2[i] = θ[i]
elseif i == n₁
cache2[i] = θₙ₁
elseif n₁ < i < n₂
cache2[i] = θ[i-1]
elseif i == n₂
cache2[i] = θₙ₂
elseif i > n₂
cache2[i] = θ[i-2]
end
end
_H = Matrix{eltype(cache2)}(undef, length(cache2), length(cache2))
original_hess!(_H, cache2, p)
for i in eachindex(cache2)
for j in eachindex(cache2)
if (i == n₁ || j == n₁) || (i == n₂ || j == n₂)
continue
else
ix = i < n₁ ? i : (i < n₂ ? i - 1 : i - 2)
jx = j < n₁ ? j : (j < n₂ ? j - 1 : j - 2)
H[ix, jx] = _H[i, j]
end
end
end
return nothing
end
end

return replace_objective_function(prob, new_f, new_grad!, new_hess!)
end


# remove lower bounds, upper bounds, and also remove the nth value from the initial estimate
function exclude_parameter(prob::OptimizationProblem, n::Integer)
!has_bounds(prob) && return update_initial_estimate(prob, prob.u0[Not(n)])
Expand All @@ -106,7 +252,7 @@ end

# replace obj by obj - shift
function shift_objective_function(prob::OptimizationProblem, shift)
original_f = prob.f
original_f = prob.f.f
new_f = @inline (θ, p) -> original_f(θ, p) - shift
return replace_objective_function(prob, new_f)
return replace_objective_function(prob, new_f, prob.f.grad, prob.f.hess)
end
3 changes: 3 additions & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ DelaunayTriangulation = "927a84f5-c5f4-47a5-9785-b46e178433df"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
FiniteVolumeMethod = "d4f04ab7-4f65-4d72-8a28-7087bc7f46f4"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
FunctionWrappers = "069b7b12-0de2-55c6-9aab-29f3d0a68a2e"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
LatinHypercubeSampling = "a5e1c1ea-c99a-51d3-a14d-a9a37257b02d"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
Expand All @@ -22,6 +24,7 @@ MuladdMacro = "46d2c3a1-f734-5fdb-9937-b9b9aeba4221"
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
OptimizationBBO = "3e6eede4-6085-4f62-9a71-46d9bc1eb92b"
OptimizationMOI = "fd9f6733-72f4-499f-8506-86b2bdd0dea1"
OptimizationNLopt = "4e6fcdb7-1186-4e1f-a706-475e75c168bb"
OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
Expand Down
60 changes: 60 additions & 0 deletions test/custom_hessian.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
using Optimization
using ProfileLikelihood
using Ipopt
using OptimizationMOI
using ForwardDiff
using Test


rosenbrock(x, p) = (1.0 - x[1])^2 + 100.0 * (x[2] - x[1]^2)^2
_∇f = (G, u, p) -> begin
p = Float64[]
_f = (x) -> rosenbrock(x, p)
ForwardDiff.gradient!(G, _f, u)

end
_Δf = (H, u, p) -> begin
p = Float64[]
_f = (x) -> rosenbrock(x, p)
ForwardDiff.hessian!(H, _f, u)
end

x0 = zeros(2) .+ 0.5
xnames = [:x1, :x2]
optimization_function = Optimization.OptimizationFunction(rosenbrock;
grad = _∇f,
hess = _Δf,
syms=xnames)
prob = OptimizationProblem(optimization_function, x0, lb = [-1.0, -1.0], ub = [0.8, 0.8])
_likelihood_problem = LikelihoodProblem{length(x0),
typeof(prob),
typeof(SciMLBase.NullParameters()),
typeof(prob.f),
typeof(x0),
typeof(xnames)}(prob,
SciMLBase.NullParameters(),
prob.f,
x0,
xnames)
sol = mle(_likelihood_problem, Ipopt.Optimizer())
prof1 = profile(_likelihood_problem, sol)

# Trusting the AD
x0 = zeros(2) .+ 0.5
optimization_function = Optimization.OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff();
syms=xnames)
prob = OptimizationProblem(optimization_function, x0, lb = [-1.0, -1.0], ub = [0.8, 0.8])
_likelihood_problem = LikelihoodProblem{length(x0),
typeof(prob),
typeof(SciMLBase.NullParameters()),
typeof(prob.f),
typeof(x0),
typeof(xnames)}(prob,
SciMLBase.NullParameters(),
prob.f,
x0,
xnames)
sol = mle(_likelihood_problem, Ipopt.Optimizer())
prof2 = profile(_likelihood_problem, sol)

@test prof1.confidence_intervals == prof2.confidence_intervals
Loading

0 comments on commit d2b7bd1

Please sign in to comment.