Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Anderson for inputs of arbitrary dimension #223

Merged
merged 7 commits into from Aug 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 8 additions & 8 deletions src/nlsolve/nlsolve.jl
Expand Up @@ -11,10 +11,10 @@ function nlsolve(df::TDF,
linsolve=(x, A, b) -> copyto!(x, A\b),
factor::Real = one(real(T)),
autoscale::Bool = true,
m::Integer = 0,
m::Integer = 10,
beta::Real = 1,
aa_start::Integer = 1,
droptol::Real = 0) where {T, TDF <: Union{NonDifferentiable, OnceDifferentiable}}
droptol::Real = convert(real(T), 1e10)) where {T, TDF <: Union{NonDifferentiable, OnceDifferentiable}}
if show_trace
@printf "Iter f(x) inf-norm Step 2-norm \n"
@printf "------ -------------- --------------\n"
Expand Down Expand Up @@ -49,10 +49,10 @@ function nlsolve(f,
linesearch = LineSearches.Static(),
factor::Real = one(real(T)),
autoscale::Bool = true,
m::Integer = 0,
m::Integer = 10,
beta::Real = 1,
aa_start::Integer = 1,
droptol::Real = 0,
droptol::Real = convert(real(T), 1e10),
autodiff = :central,
linsolve=(x, A, b) -> copyto!(x, A\b),
inplace = !applicable(f, initial_x)) where T
Expand Down Expand Up @@ -92,10 +92,10 @@ function nlsolve(f,
linesearch = LineSearches.Static(),
factor::Real = one(real(T)),
autoscale::Bool = true,
m::Integer = 0,
m::Integer = 10,
beta::Real = 1,
aa_start::Integer = 1,
droptol::Real = 0,
droptol::Real = convert(real(T), 1e10),
inplace = !applicable(f, initial_x),
linsolve=(x, A, b) -> copyto!(x, A\b)) where T
if inplace
Expand Down Expand Up @@ -125,10 +125,10 @@ function nlsolve(f,
linesearch = LineSearches.Static(),
factor::Real = one(real(T)),
autoscale::Bool = true,
m::Integer = 0,
m::Integer = 10,
beta::Real = 1,
aa_start::Integer = 1,
droptol::Real = 0,
droptol::Real = convert(real(T), 1e10),
inplace = !applicable(f, initial_x),
linsolve=(x, A, b) -> copyto!(x, A\b)) where T
if inplace
Expand Down
4 changes: 2 additions & 2 deletions src/nlsolve/utils.jl
Expand Up @@ -30,8 +30,8 @@ function assess_convergence(x,
end

function check_isfinite(x::AbstractArray)
if any((y)->!isfinite(y),x)
i = findall((!).(isfinite.(x)))
if any(!isfinite, x)
i = findall(!isfinite, x)
throw(IsFiniteException(i))
end
end
Expand Down
111 changes: 60 additions & 51 deletions src/solvers/anderson.jl
@@ -1,8 +1,6 @@
# Notations from Walker & Ni, "Anderson acceleration for fixed-point iterations", SINUM 2011
# Attempts to accelerate the iteration xₙ₊₁ = xₙ + beta*f(xₙ)

struct Anderson{m} end

struct AndersonCache{Tx,To,Tdg,Tg,TQ,TR} <: AbstractSolverCache
x::Tx
g::Tx
Expand All @@ -14,34 +12,40 @@ struct AndersonCache{Tx,To,Tdg,Tg,TQ,TR} <: AbstractSolverCache
R::TR
end

function AndersonCache(df, ::Anderson{m}) where m
function AndersonCache(df, m)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing this was done to have m be known to the compiler

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think, the current implementation has the same type instability problem, it's just better hidden. In

anderson(df, initial_x, xtol, ftol, iterations, store_trace, show_trace, extended_trace, beta, aa_start, droptol, AndersonCache(df, Anderson{m}()))

the creation of Anderson{m} is also type unstable.

As far as I understand, both these instabilities in the current and the new implementation should not be a problem, since the type instability should not affect the output type when calling nlsolve and after the first call of Anderson{m} and AndersonCache(df, m) everything can be inferred. To me it seems, the current implementation is just more complicated without providing any benefits.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I retract my objection then!

x = similar(df.x_f)
g = similar(x)

fxold = similar(x)
gold = similar(x)

# maximum size of history
mmax = min(length(x), m)

# buffer storing the differences between g of the iterates, from oldest to newest
Δgs = [similar(x) for _ in 1:mmax]

T = eltype(x)
γs = Vector{T}(undef, mmax) # coefficients obtained from the least-squares problem

# matrices for QR decomposition
Q = Matrix{T}(undef, length(x), mmax)
R = Matrix{T}(undef, mmax, mmax)
if m > 0
fxold = similar(x)
gold = similar(x)

# maximum size of history
mmax = min(length(x), m)

# buffer storing the differences between g of the iterates, from oldest to newest
Δgs = [similar(x) for _ in 1:mmax]

T = eltype(x)
γs = Vector{T}(undef, mmax) # coefficients obtained from the least-squares problem

# matrices for QR decomposition
Q = Matrix{T}(undef, length(x), mmax)
R = Matrix{T}(undef, mmax, mmax)
else
fxold = nothing
gold = nothing
Δgs = nothing
γs = nothing
Q = nothing
R = nothing
end
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unstable. Not sure if that's a big problem in practice since small type unions are supposed to be fast these days, and anyway Anderson is mostly useful for large scale in which the overhead doesn't matter...


AndersonCache(x, g, fxold, gold, Δgs, γs, Q, R)
end

AndersonCache(df, ::Anderson{0}) =
AndersonCache(similar(df.x_f), similar(df.x_f), nothing, nothing, nothing, nothing, nothing, nothing)

@views function anderson_(df::Union{NonDifferentiable, OnceDifferentiable},
initial_x::AbstractArray{T},
initial_x::AbstractArray,
xtol::Real,
ftol::Real,
iterations::Integer,
Expand All @@ -51,7 +55,7 @@ AndersonCache(df, ::Anderson{0}) =
beta::Real,
aa_start::Integer,
droptol::Real,
cache::AndersonCache) where T
cache::AndersonCache)
copyto!(cache.x, initial_x)
tr = SolverTrace()
tracing = store_trace || show_trace || extended_trace
Expand All @@ -65,9 +69,14 @@ AndersonCache(df, ::Anderson{0}) =
while iter < iterations
iter += 1

# fixed-point iteration
# evaluate function
value!!(df, cache.x)
fx = value(df)

# check that all values are finite
check_isfinite(fx)

# compute next iterate of fixed-point iteration
@. cache.g = cache.x + beta * fx

# save trace
Expand All @@ -80,7 +89,7 @@ AndersonCache(df, ::Anderson{0}) =
update!(tr,
iter,
maximum(abs, fx),
iter > 1 ? sqeuclidean(cache.g, cache.x) : convert(real(T),NaN),
iter > 1 ? sqeuclidean(cache.g, cache.x) : convert(real(eltype(initial_x)), NaN),
dt,
store_trace,
show_trace)
Expand All @@ -90,7 +99,7 @@ AndersonCache(df, ::Anderson{0}) =
x_converged, f_converged, converged = assess_convergence(cache.g, cache.x, fx, xtol, ftol)
converged && break

# define next iterate
# update current iterate
copyto!(cache.x, cache.g)

# perform Anderson acceleration
Expand Down Expand Up @@ -144,7 +153,7 @@ AndersonCache(df, ::Anderson{0}) =

# solve least squares problem
γs = view(cache.γs, 1:m_eff)
ldiv!(R, mul!(γs, Q', fx))
ldiv!(R, mul!(γs, Q', vec(fx)))

# update next iterate
for i in 1:m_eff
Expand All @@ -161,31 +170,31 @@ AndersonCache(df, ::Anderson{0}) =
end

function anderson(df::Union{NonDifferentiable, OnceDifferentiable},
initial_x::AbstractArray,
xtol::Real,
ftol::Real,
iterations::Integer,
store_trace::Bool,
show_trace::Bool,
extended_trace::Bool,
m::Integer,
beta::Real,
aa_start::Integer,
droptol::Real)
anderson(df, initial_x, xtol, ftol, iterations, store_trace, show_trace, extended_trace, beta, aa_start, droptol, AndersonCache(df, Anderson{m}()))
initial_x::AbstractArray,
xtol::Real,
ftol::Real,
iterations::Integer,
store_trace::Bool,
show_trace::Bool,
extended_trace::Bool,
m::Integer,
beta::Real,
aa_start::Integer,
droptol::Real)
anderson(df, initial_x, xtol, ftol, iterations, store_trace, show_trace, extended_trace, beta, aa_start, droptol, AndersonCache(df, m))
end

function anderson(df::Union{NonDifferentiable, OnceDifferentiable},
initial_x::AbstractArray{T},
xtol::Real,
ftol::Real,
iterations::Integer,
store_trace::Bool,
show_trace::Bool,
extended_trace::Bool,
beta::Real,
aa_start::Integer,
droptol::Real,
cache::AndersonCache) where T
anderson_(df, initial_x, convert(real(T), xtol), convert(real(T), ftol), iterations, store_trace, show_trace, extended_trace, beta, aa_start, droptol, cache)
initial_x::AbstractArray,
xtol::Real,
ftol::Real,
iterations::Integer,
store_trace::Bool,
show_trace::Bool,
extended_trace::Bool,
beta::Real,
aa_start::Integer,
droptol::Real,
cache::AndersonCache)
anderson_(df, initial_x, convert(real(eltype(initial_x)), xtol), convert(real(eltype(initial_x)), ftol), iterations, store_trace, show_trace, extended_trace, beta, aa_start, droptol, cache)
end
5 changes: 5 additions & 0 deletions test/2by2.jl
Expand Up @@ -72,4 +72,9 @@ r = nlsolve(df, [ -0.5; 1.4], method = :newton, linesearch = LineSearches.Strong
r = nlsolve(df, [ 0.01; .99], method = :anderson, m = 10, beta=.01)
@test converged(r)
@test norm(r.zero - [ 0; 1]) < 1e-8
@test r.iterations == 9

# without Anderson acceleration fixed-point iteration does not converge in 1_000 iterations
r = nlsolve(df, [ 0.01; .99], method = :anderson, m = 0, beta=.01)
@test !converged(r)
end
9 changes: 5 additions & 4 deletions test/abstractarray.jl
Expand Up @@ -37,8 +37,8 @@ initial_x = vec(initial_x_matrix)
#initial_x_wrapped = WrappedArray(initial_x_matrix)

for method in (:trust_region, :newton, :anderson, :broyden)
r = nlsolve(f!, j!, initial_x, method = method)
r_matrix = nlsolve(f!, j!, initial_x_matrix, method = method)
r = nlsolve(f!, j!, initial_x, method = method, m = 2, beta = 1e-3)
r_matrix = nlsolve(f!, j!, initial_x_matrix, method = method, m = 2, beta = 1e-3)
#r_wrapped = nlsolve(f!, j!, initial_x_wrapped, method = method)

@test r.zero == vec(r_matrix.zero)
Expand All @@ -48,8 +48,9 @@ for method in (:trust_region, :newton, :anderson, :broyden)
@test typeof(r.zero) == typeof(initial_x)
@test typeof(r_matrix.zero) == typeof(initial_x_matrix)
#@test typeof(r_wrapped.zero) == typeof(initial_x_wrapped)
r_AD = nlsolve(f!, initial_x, method = method, autodiff = :forward)
r_matrix_AD = nlsolve(f!, initial_x_matrix, method = method, autodiff = :forward)

r_AD = nlsolve(f!, initial_x, method = method, m = 2, beta = 1e-3, autodiff = :forward)
r_matrix_AD = nlsolve(f!, initial_x_matrix, method = method, m = 2, beta = 1e-3, autodiff = :forward)
#r_wrapped_AD = nlsolve(f!, initial_x_wrapped, method = method, autodiff = :forward)

@test r_AD.zero == vec(r_matrix_AD.zero)
Expand Down
2 changes: 1 addition & 1 deletion test/iface.jl
Expand Up @@ -106,7 +106,7 @@ r = nlsolve(n_ary(f), [ -0.5; 1.4])
r = nlsolve(n_ary(f), [ -0.5; 1.4], autodiff = :forward)
@test converged(r)

@testset "andersont trace issue #160" begin
@testset "anderson trace issue #160" begin

function f_2by2!(F, x)
F[1] = (x[1]+3)*(x[2]^3-7)+18
Expand Down
2 changes: 1 addition & 1 deletion test/interface/caches.jl
Expand Up @@ -15,7 +15,7 @@ ncp = copy(nc.p)
r = NLsolve.newton(df, [ 1.; 1.], 0.1, 0.1, 100, false, false, false, LineSearches.Static(), nc)
@test !(ncp == nc.p)

ac = NLsolve.AndersonCache(df, NLsolve.Anderson{10}())
ac = NLsolve.AndersonCache(df, 10)
ac.x .= 22.0
acx = copy(ac.x)
r = NLsolve.anderson(df, [ 1.; 1.], 0.1, 0.1, 100, false, false, false, 0.9, 1, 0, ac)
Expand Down