Merge pull request #97 from sebapersson/main

Allow custom gradient and Hessian in OptimizationFunction
DanielVandH · Jan 23, 2024 · d2b7bd1 · d2b7bd1
2 parents 2268df9 + 6501492
commit d2b7bd1
Show file tree

Hide file tree

Showing 6 changed files with 294 additions and 20 deletions.
diff --git a/src/problem_updates.jl b/src/problem_updates.jl
@@ -2,21 +2,21 @@ update_initial_estimate(prob::OptimizationProblem, θ) = remake(prob; u0=θ)
 update_initial_estimate(prob::OptimizationProblem, sol::SciMLBase.OptimizationSolution) = update_initial_estimate(prob, sol.u)
 
 # replace obj with a new obj
-@inline function replace_objective_function(f::OF, new_f::FF) where {iip,AD,F,G,H,HV,C,CJ,CH,HP,CJP,CHP,S,S2,O,EX,CEX,SYS,LH,LHP,HCV,CJCV,CHCV,LHCV,OF<:OptimizationFunction{iip,AD,F,G,H,HV,C,CJ,CH,HP,CJP,CHP,S,S2,O,EX,CEX,SYS,LH,LHP,HCV,CJCV,CHCV,LHCV},FF}
+@inline function replace_objective_function(f::OF, new_f::FF, new_grad!, new_hess!) where {OF, FF}
     # scimlbase needs to add a constructorof method for OptimizationFunction before we can just do @set with accessors.jl. 
     # g = @set f.f = new_f
     # return g
-    return OptimizationFunction{iip,AD,FF,G,H,HV,C,CJ,CH,HP,CJP,CHP,S,S2,O,EX,CEX,SYS,LH,LHP,HCV,CJCV,CHCV,LHCV}(
+    return OptimizationFunction(
         new_f,
-        f.adtype,
-        f.grad,
-        f.hess,
-        f.hv,
-        f.cons,
-        f.cons_j,
-        f.cons_h,
-        f.hess_prototype,
-        f.cons_jac_prototype,
+        f.adtype;
+        grad=new_grad!,
+        hess=new_hess!,
+        hv=f.hv,
+        cons=f.cons,
+        cons_j=f.cons_j,
+        cons_h=f.cons_h,
+        hess_prototype=f.hess_prototype,
+        cons_jac_prototype=f.cons_jac_prototype,
         f.cons_hess_prototype,
         f.syms,
         f.paramsyms,
@@ -33,14 +33,17 @@ update_initial_estimate(prob::OptimizationProblem, sol::SciMLBase.OptimizationSo
     )
 end
 
-@inline function replace_objective_function(prob::F, obj::FF) where {F<:OptimizationProblem,FF}
-    g = replace_objective_function(prob.f, obj)
+@inline function replace_objective_function(prob::F, obj::FF, grad, hess) where {F<:OptimizationProblem, FF}
+    g = replace_objective_function(prob.f, obj, grad, hess)
     return remake(prob; f=g)
 end
 
 # fix the objective function's nth parameter at θₙ
 function construct_fixed_optimisation_function(prob::OptimizationProblem, n::Integer, θₙ, cache)
     original_f = prob.f
+    original_grad! = prob.f.grad
+    original_hess! = prob.f.hess
+
     new_f = @inline (θ, p) -> begin
         cache2 = get_tmp(cache, θ)
         for i in eachindex(cache2)
@@ -54,7 +57,72 @@ function construct_fixed_optimisation_function(prob::OptimizationProblem, n::Int
         end
         return original_f(cache2, p)
     end
-    return replace_objective_function(prob, new_f)
+
+    # In case a custom gradient is provided the new gradient function the full gradient
+    # is computed, but the value for parameter n is dropped
+    if isnothing(original_grad!)
+        # Default for an OptimizationProblem
+        new_grad! = nothing 
+    else
+        new_grad! = (g, θ, p) -> begin
+            cache2 = get_tmp(cache, θ)
+            for i in eachindex(cache2)
+                if i < n
+                    cache2[i] = θ[i]
+                elseif i > n
+                    cache2[i] = θ[i-1]
+                else
+                    cache2[n] = θₙ
+                end
+            end     
+            _g = similar(cache2)           
+            original_grad!(_g, cache2, p)
+            for i in eachindex(cache2)
+                if i < n
+                    g[i] = _g[i]
+                elseif i > n
+                    g[i-1] = _g[i]
+                end
+            end
+            return nothing
+        end
+    end
+
+    # Likewise, if a custom Hessian is provided the full Hessian is computed, 
+    # but the value for parameter n is dropped 
+    if isnothing(original_hess!)
+        # Default for an OptimizationProblem
+        new_hess! = nothing 
+    else
+        new_hess! = (H, θ, p) -> begin
+            cache2 = get_tmp(cache, θ)
+            for i in eachindex(cache2)
+                if i < n
+                    cache2[i] = θ[i]
+                elseif i > n
+                    cache2[i] = θ[i-1]
+                else
+                    cache2[n] = θₙ
+                end
+            end     
+            _H = Matrix{eltype(cache2)}(undef, length(cache2), length(cache2))
+            original_hess!(_H, cache2, p)
+            for i in eachindex(cache2)
+                for j in eachindex(cache2)
+                    if i == n || j == n
+                        continue
+                    else i < n && j < n
+                        ix = i < n ? i : i - 1
+                        jx = j < n ? j : j - 1
+                        H[ix, jx] = _H[i, j]
+                    end
+                end
+            end
+            return nothing
+        end
+    end
+
+    return replace_objective_function(prob, new_f, new_grad!, new_hess!)
 end
 
 # fix the objective function's (n1, n2) parameters at (θn1, θn2)
@@ -66,6 +134,9 @@ function construct_fixed_optimisation_function(prob::OptimizationProblem, n::NTu
         θₙ₁, θₙ₂ = θₙ₂, θₙ₁
     end
     original_f = prob.f
+    original_grad! = prob.f.grad
+    original_hess! = prob.f.hess
+
     new_f = @inline (θ, p) -> begin
         cache2 = get_tmp(cache, θ)
         @inbounds for i in eachindex(cache2)
@@ -83,9 +154,84 @@ function construct_fixed_optimisation_function(prob::OptimizationProblem, n::NTu
         end
         return original_f(cache2, p)
     end
-    return replace_objective_function(prob, new_f)
+
+    # In case a custom gradient is provided the new gradient function the full gradient
+    # but the value for parameter n₁ and n₂ are dropped
+    if isnothing(original_grad!)
+        # Default for an OptimizationProblem
+        new_grad! = nothing 
+    else
+        new_grad! = (g, θ, p) -> begin
+            cache2 = get_tmp(cache, θ)
+            for i in eachindex(cache2)
+                if i < n₁
+                    cache2[i] = θ[i]
+                elseif i == n₁
+                    cache2[i] = θₙ₁
+                elseif n₁ < i < n₂
+                    cache2[i] = θ[i-1]
+                elseif i == n₂
+                    cache2[i] = θₙ₂
+                elseif i > n₂
+                    cache2[i] = θ[i-2]
+                end
+            end
+            _g = similar(cache2)           
+            original_grad!(_g, cache2, p)
+            for i in eachindex(cache2)
+                if i == n₁ || i == n₂
+                    continue
+                else
+                    ix = i < n₁ ? i : (i < n₂ ? i - 1 : i - 2)
+                    g[ix] = _g[i]
+                end
+            end
+            return nothing
+        end
+    end
+
+    # Likewise, if a custom Hessian is provided the full Hessian is computed, 
+    # but the value for parameter n₁ and n₂ are dropped
+    if isnothing(original_hess!)
+        # Default for an OptimizationProblem
+        new_hess! = nothing 
+    else
+        new_hess! = (H, θ, p) -> begin
+            cache2 = get_tmp(cache, θ)
+            for i in eachindex(cache2)
+                if i < n₁
+                    cache2[i] = θ[i]
+                elseif i == n₁
+                    cache2[i] = θₙ₁
+                elseif n₁ < i < n₂
+                    cache2[i] = θ[i-1]
+                elseif i == n₂
+                    cache2[i] = θₙ₂
+                elseif i > n₂
+                    cache2[i] = θ[i-2]
+                end
+            end
+            _H = Matrix{eltype(cache2)}(undef, length(cache2), length(cache2))
+            original_hess!(_H, cache2, p)
+            for i in eachindex(cache2)
+                for j in eachindex(cache2)
+                    if (i == n₁ || j == n₁) || (i == n₂ || j == n₂)
+                        continue
+                    else
+                        ix = i < n₁ ? i : (i < n₂ ? i - 1 : i - 2)
+                        jx = j < n₁ ? j : (j < n₂ ? j - 1 : j - 2)
+                        H[ix, jx] = _H[i, j]
+                    end
+                end
+            end
+            return nothing
+        end
+    end
+
+    return replace_objective_function(prob, new_f, new_grad!, new_hess!)
 end
 
+
 # remove lower bounds, upper bounds, and also remove the nth value from the initial estimate
 function exclude_parameter(prob::OptimizationProblem, n::Integer)
     !has_bounds(prob) && return update_initial_estimate(prob, prob.u0[Not(n)])
@@ -106,7 +252,7 @@ end
 
 # replace obj by obj - shift
 function shift_objective_function(prob::OptimizationProblem, shift)
-    original_f = prob.f
+    original_f = prob.f.f
     new_f = @inline (θ, p) -> original_f(θ, p) - shift
-    return replace_objective_function(prob, new_f)
+    return replace_objective_function(prob, new_f, prob.f.grad, prob.f.hess)
 end
diff --git a/test/Project.toml b/test/Project.toml
@@ -9,10 +9,12 @@ DelaunayTriangulation = "927a84f5-c5f4-47a5-9785-b46e178433df"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
 FiniteVolumeMethod = "d4f04ab7-4f65-4d72-8a28-7087bc7f46f4"
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 FunctionWrappers = "069b7b12-0de2-55c6-9aab-29f3d0a68a2e"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
 InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
+Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
 LatinHypercubeSampling = "a5e1c1ea-c99a-51d3-a14d-a9a37257b02d"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
@@ -22,6 +24,7 @@ MuladdMacro = "46d2c3a1-f734-5fdb-9937-b9b9aeba4221"
 OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
 Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
 OptimizationBBO = "3e6eede4-6085-4f62-9a71-46d9bc1eb92b"
+OptimizationMOI = "fd9f6733-72f4-499f-8506-86b2bdd0dea1"
 OptimizationNLopt = "4e6fcdb7-1186-4e1f-a706-475e75c168bb"
 OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"

diff --git a/test/custom_hessian.jl b/test/custom_hessian.jl
@@ -0,0 +1,60 @@
+using Optimization
+using ProfileLikelihood
+using Ipopt
+using OptimizationMOI
+using ForwardDiff
+using Test
+
+
+rosenbrock(x, p) = (1.0 - x[1])^2 + 100.0 * (x[2] - x[1]^2)^2
+_∇f = (G, u, p) ->  begin 
+    p = Float64[]
+    _f = (x) -> rosenbrock(x, p)
+    ForwardDiff.gradient!(G, _f, u)
+
+end
+_Δf = (H, u, p) ->  begin 
+    p = Float64[]
+    _f = (x) -> rosenbrock(x, p)
+    ForwardDiff.hessian!(H, _f, u)
+end
+
+x0 = zeros(2) .+ 0.5
+xnames = [:x1, :x2]
+optimization_function = Optimization.OptimizationFunction(rosenbrock;
+                                                          grad = _∇f,
+                                                          hess = _Δf,
+                                                          syms=xnames)
+prob = OptimizationProblem(optimization_function, x0, lb = [-1.0, -1.0], ub = [0.8, 0.8])
+_likelihood_problem = LikelihoodProblem{length(x0),
+                                        typeof(prob),
+                                        typeof(SciMLBase.NullParameters()),
+                                        typeof(prob.f),
+                                        typeof(x0),
+                                        typeof(xnames)}(prob,
+                                                        SciMLBase.NullParameters(),
+                                                        prob.f,
+                                                        x0,
+                                                        xnames)
+sol = mle(_likelihood_problem, Ipopt.Optimizer())
+prof1 = profile(_likelihood_problem, sol)
+
+# Trusting the AD
+x0 = zeros(2) .+ 0.5
+optimization_function = Optimization.OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff();
+                                                          syms=xnames)
+prob = OptimizationProblem(optimization_function, x0, lb = [-1.0, -1.0], ub = [0.8, 0.8])
+_likelihood_problem = LikelihoodProblem{length(x0),
+                                        typeof(prob),
+                                        typeof(SciMLBase.NullParameters()),
+                                        typeof(prob.f),
+                                        typeof(x0),
+                                        typeof(xnames)}(prob,
+                                                        SciMLBase.NullParameters(),
+                                                        prob.f,
+                                                        x0,
+                                                        xnames)
+sol = mle(_likelihood_problem, Ipopt.Optimizer())
+prof2 = profile(_likelihood_problem, sol)
+
+@test prof1.confidence_intervals == prof2.confidence_intervals