-
Notifications
You must be signed in to change notification settings - Fork 67
/
trust_region.jl
188 lines (166 loc) · 5.76 KB
/
trust_region.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
macro trustregiontrace(stepnorm)
quote
if tracing
dt = Dict()
if extended_trace
dt["x"] = copy(x)
dt["f(x)"] = copy(r)
dt["g(x)"] = copy(J)
dt["delta"] = delta
dt["rho"] = rho
end
update!(tr,
it,
maximum(abs(r)),
$stepnorm,
dt,
store_trace,
show_trace)
end
end
end
function dogleg!{T}(p::Vector{T}, r::Vector{T}, d2::Vector{T}, J::AbstractMatrix{T}, delta::Real)
local p_i
try
p_i = J \ r # Gauss-Newton step
catch e
if isa(e, Base.LinAlg.LAPACKException) || isa(e, Base.LinAlg.SingularException)
# If Jacobian is singular, compute a least-squares solution to J*x+r=0
U, S, V = svd(full(J)) # Convert to full matrix because sparse SVD not implemented as of Julia 0.3
k = sum(S .> eps())
mrinv = V * diagm([1./S[1:k]; zeros(eltype(S), length(S)-k)]) * U' # Moore-Penrose generalized inverse of J
p_i = mrinv * r
else
throw(e)
end
end
scale!(p_i, -one(T))
# Test if Gauss-Newton step is within the region
if wnorm(d2, p_i) <= delta
copy!(p, p_i)
else
p_c = At_mul_B(J, r) # Gradient direction
broadcast!(/, p_c, p_c, d2)
scale!(p_c, - wnorm(d2, p_c)^2 / sumabs2(J * p_c)) # Cauchy point
if wnorm(d2, p_c) >= delta
# Cauchy point is out of the region, take the largest step along
# gradient direction
scale!(p, p_c, delta / wnorm(d2, p_c))
else
p_diff = Base.BLAS.axpy!(-one(T), p_c, p_i)
# Compute the optimal point on dogleg path
b = 2 * wdot(d2, p_c, p_diff)
a = wdot(d2, p_diff, p_diff)
c = wdot(d2, p_c, p_c)
tau = (-b + sqrt(b^2 - 4 * a * (c - delta^2)))/(2*a)
copy!(p, p_c)
Base.BLAS.axpy!(tau, p_diff, p)
end
end
end
function trust_region_{T}(df::AbstractDifferentiableMultivariateFunction,
initial_x::Vector{T},
xtol::T,
ftol::T,
iterations::Integer,
store_trace::Bool,
show_trace::Bool,
extended_trace::Bool,
factor::T,
autoscale::Bool)
x = copy(initial_x) # Current point
nn = length(x)
xold = fill(convert(T, NaN), nn) # Old point
r = similar(x) # Current residual
r_new = similar(x) # New residual
r_predict = similar(x) # predicted residual
p = similar(x) # Step
d2 = similar(x) # Scaling vector
J = alloc_jacobian(df, T, nn) # Jacobian
# Count function calls
f_calls, g_calls = 0, 0
df.fg!(x, r, J)
f_calls += 1
g_calls += 1
check_isfinite(r)
it = 0
x_converged, f_converged, converged = assess_convergence(x, xold, r, xtol, ftol)
delta = convert(T, NaN)
rho = convert(T, NaN)
tr = SolverTrace()
tracing = store_trace || show_trace || extended_trace
@trustregiontrace convert(T, NaN)
if autoscale
for j = 1:nn
d2[j] = sumabs2(view(J, :, j))
if d2[j] == zero(T)
d2[j] = one(T)
end
end
else
fill!(d2, one(T))
end
delta = factor * wnorm(d2, x)
if delta == 0
delta = factor
end
eta = convert(T, 1e-4)
while !converged && it < iterations
it += 1
# Compute proposed iteration step
dogleg!(p, r, d2, J, delta)
copy!(xold, x)
Base.BLAS.axpy!(one(T), p, x)
df.f!(x, r_new)
f_calls += 1
# Ratio of actual to predicted reduction
A_mul_B!(r_predict, J, p)
Base.BLAS.axpy!(one(T), r, r_predict)
rho = (sumabs2(r) - sumabs2(r_new)) / (sumabs2(r) - sumabs2(r_predict))
if rho > eta
# Successful iteration
copy!(r, r_new)
df.g!(x, J)
g_calls += 1
# Update scaling vector
if autoscale
for j = 1:nn
d2[j] = max(convert(T, 0.01) * d2[j], sumabs2(view(J, :, j)))
end
end
x_converged, f_converged, converged = assess_convergence(x, xold, r, xtol, ftol)
else
Base.BLAS.axpy!(-one(T), p, x)
x_converged, converged = false, false
end
@trustregiontrace sqeuclidean(x, xold)
# Update size of trust region
if rho < 0.1
delta = delta/2
elseif rho >= 0.9
delta = 2 * wnorm(d2, p)
elseif rho >= 0.5
delta = max(delta, 2 * wnorm(d2, p))
end
end
name = "Trust-region with dogleg"
if autoscale
name *= " and autoscaling"
end
return SolverResults(name,
initial_x, x, norm(r, Inf),
it, x_converged, xtol, f_converged, ftol, tr,
f_calls, g_calls)
end
function trust_region{T}(df::AbstractDifferentiableMultivariateFunction,
initial_x::Vector{T},
xtol::Real,
ftol::Real,
iterations::Integer,
store_trace::Bool,
show_trace::Bool,
extended_trace::Bool,
factor::Real,
autoscale::Bool)
trust_region_(df, initial_x, convert(T,xtol), convert(T,ftol), iterations, store_trace, show_trace, extended_trace, convert(T,factor), autoscale)
end