/
conjugate_gradient_descent.jl
126 lines (116 loc) · 5.13 KB
/
conjugate_gradient_descent.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
@doc raw"""
conjugate_gradient_descent(M, F, gradF, x)
perform a conjugate gradient based descent
````math
x_{k+1} = \operatorname{retr}_{x_k} \bigl( s_kδ_k \bigr),
````
where ``\operatorname{retr}`` denotes a retraction on the `Manifold` `M`
and one can employ different rules to update the descent direction ``δ_k`` based on
the last direction ``δ_{k-1}`` and both gradients ``\operatorname{grad}f(x_k)``,``\operatorname{grad}f(x_{k-1})``.
The [`Stepsize`](@ref) ``s_k`` may be determined by a [`Linesearch`](@ref).
Available update rules are [`SteepestDirectionUpdateRule`](@ref), which yields a [`gradient_descent`](@ref),
[`ConjugateDescentCoefficient`](@ref) (the default), [`DaiYuanCoefficient`](@ref), [`FletcherReevesCoefficient`](@ref),
[`HagerZhangCoefficient`](@ref), [`HeestenesStiefelCoefficient`](@ref),
[`LiuStoreyCoefficient`](@ref), and [`PolakRibiereCoefficient`](@ref).
They all compute ``β_k`` such that this algorithm updates the search direction as
````math
\delta_k=\operatorname{grad}f(x_k) + β_k \delta_{k-1}
````
# Input
* `M` : a manifold ``\mathcal M``
* `F` : a cost function ``F:\mathcal M→ℝ`` to minimize
* `gradF`: the gradient ``\operatorname{grad}F:\mathcal M → T\mathcal M`` of ``F``
* `x` : an initial value ``x∈\mathcal M``
# Optional
* `coefficient` : ([`ConjugateDescentCoefficient`](@ref) `<:` [`DirectionUpdateRule`](@ref))
rule to compute the descent direction update coefficient ``β_k``,
as a functor, i.e. the resulting function maps `(p,o,i) -> β`, where
`p` is the current [`GradientProblem`](@ref), `o` are the
[`ConjugateGradientDescentOptions`](@ref) `o` and `i` is the current iterate.
* `evaluation` – ([`AllocatingEvaluation`](@ref)) specify whether the gradient works by allocation (default) form `gradF(M, x)`
or [`MutatingEvaluation`](@ref) in place, i.e. is of the form `gradF!(M, X, x)`.
* `retraction_method` - (`default_retraction_method(M`) a retraction method to use.
* `return_options` – (`false`) – if actiavated, the extended result, i.e. the
complete [`Options`](@ref) re returned. This can be used to access recorded values.
If set to false (default) just the optimal value `x_opt` if returned
* `stepsize` - (`Constant(1.)`) A [`Stepsize`](@ref) function applied to the
search direction. The default is a constant step size 1.
* `stopping_criterion` : (`stopWhenAny( stopAtIteration(200), stopGradientNormLess(10.0^-8))`)
a function indicating when to stop.
* `vector_transport_method` – (`default_vector_transport_method(M)`) vector transport method to transport
the old descent direction when computing the new descent direction.
# Output
* `x_opt` – the resulting (approximately critical) point of gradientDescent
OR
* `options` - the options returned by the solver (see `return_options`)
"""
function conjugate_gradient_descent(
M::AbstractManifold, F::TF, gradF::TDF, x; kwargs...
) where {TF,TDF}
x_res = copy(M, x)
return conjugate_gradient_descent!(M, F, gradF, x_res; kwargs...)
end
@doc raw"""
conjugate_gradient_descent!(M, F, gradF, x)
perform a conjugate gradient based descent in place of `x`, i.e.
````math
x_{k+1} = \operatorname{retr}_{x_k} \bigl( s_k\delta_k \bigr),
````
where ``\operatorname{retr}`` denotes a retraction on the `Manifold` `M`
# Input
* `M` : a manifold ``\mathcal M``
* `F` : a cost function ``F:\mathcal M→ℝ`` to minimize
* `gradF`: the gradient ``\operatorname{grad}F:\mathcal M→ T\mathcal M`` of F
* `x` : an initial value ``x∈\mathcal M``
for more details and options, especially the [`DirectionUpdateRule`](@ref)s,
see [`conjugate_gradient_descent`](@ref).
"""
function conjugate_gradient_descent!(
M::AbstractManifold,
F::TF,
gradF::TDF,
x;
coefficient::DirectionUpdateRule=ConjugateDescentCoefficient(),
stepsize::Stepsize=ConstantStepsize(M),
retraction_method::AbstractRetractionMethod=default_retraction_method(M),
stopping_criterion::StoppingCriterion=StopWhenAny(
StopAfterIteration(500), StopWhenGradientNormLess(10^(-8))
),
vector_transport_method=default_vector_transport_method(M),
return_options=false,
kwargs...,
) where {TF,TDF}
p = GradientProblem(M, F, gradF)
X = zero_vector(M, x)
o = ConjugateGradientDescentOptions(
M,
x,
stopping_criterion,
stepsize,
coefficient,
retraction_method,
vector_transport_method,
X,
)
o = decorate_options(o; kwargs...)
resultO = solve(p, o)
if return_options
return resultO
end
return get_solver_result(resultO)
end
function initialize_solver!(p::GradientProblem, o::ConjugateGradientDescentOptions)
o.gradient = get_gradient(p, o.x)
o.δ = -o.gradient
return o.β = 0.0
end
function step_solver!(p::GradientProblem, o::ConjugateGradientDescentOptions, i)
xOld = o.x
retract!(p.M, o.x, o.x, get_stepsize(p, o, i, o.δ) * o.δ, o.retraction_method)
get_gradient!(p, o.gradient, o.x)
o.β = o.coefficient(p, o, i)
vector_transport_to!(p.M, o.δ, xOld, o.δ, o.x, o.vector_transport_method)
o.δ .= -o.gradient .+ o.β * o.δ
return o.δ
end
get_solver_result(o::ConjugateGradientDescentOptions) = o.x