From 9b4c7b37e6fd2911d1fab77ebeabd450a039712b Mon Sep 17 00:00:00 2001 From: David Widmann Date: Wed, 14 Aug 2019 22:29:09 +0200 Subject: [PATCH] Fix Anderson for inputs of arbitrary dimension (#223) * Use Walker's defaults for m and droptol * Remove Anderson struct * Check if values are finite * Fix Anderson acceleration for inputs of arbitrary dimension * Remove where clauses * Add test * Fix typo --- src/nlsolve/nlsolve.jl | 16 +++--- src/nlsolve/utils.jl | 4 +- src/solvers/anderson.jl | 111 +++++++++++++++++++++------------------ test/2by2.jl | 5 ++ test/abstractarray.jl | 9 ++-- test/iface.jl | 2 +- test/interface/caches.jl | 2 +- 7 files changed, 82 insertions(+), 67 deletions(-) diff --git a/src/nlsolve/nlsolve.jl b/src/nlsolve/nlsolve.jl index bd7a6fd..96bd740 100644 --- a/src/nlsolve/nlsolve.jl +++ b/src/nlsolve/nlsolve.jl @@ -11,10 +11,10 @@ function nlsolve(df::TDF, linsolve=(x, A, b) -> copyto!(x, A\b), factor::Real = one(real(T)), autoscale::Bool = true, - m::Integer = 0, + m::Integer = 10, beta::Real = 1, aa_start::Integer = 1, - droptol::Real = 0) where {T, TDF <: Union{NonDifferentiable, OnceDifferentiable}} + droptol::Real = convert(real(T), 1e10)) where {T, TDF <: Union{NonDifferentiable, OnceDifferentiable}} if show_trace @printf "Iter f(x) inf-norm Step 2-norm \n" @printf "------ -------------- --------------\n" @@ -49,10 +49,10 @@ function nlsolve(f, linesearch = LineSearches.Static(), factor::Real = one(real(T)), autoscale::Bool = true, - m::Integer = 0, + m::Integer = 10, beta::Real = 1, aa_start::Integer = 1, - droptol::Real = 0, + droptol::Real = convert(real(T), 1e10), autodiff = :central, linsolve=(x, A, b) -> copyto!(x, A\b), inplace = !applicable(f, initial_x)) where T @@ -92,10 +92,10 @@ function nlsolve(f, linesearch = LineSearches.Static(), factor::Real = one(real(T)), autoscale::Bool = true, - m::Integer = 0, + m::Integer = 10, beta::Real = 1, aa_start::Integer = 1, - droptol::Real = 0, + droptol::Real = convert(real(T), 1e10), inplace = !applicable(f, initial_x), linsolve=(x, A, b) -> copyto!(x, A\b)) where T if inplace @@ -125,10 +125,10 @@ function nlsolve(f, linesearch = LineSearches.Static(), factor::Real = one(real(T)), autoscale::Bool = true, - m::Integer = 0, + m::Integer = 10, beta::Real = 1, aa_start::Integer = 1, - droptol::Real = 0, + droptol::Real = convert(real(T), 1e10), inplace = !applicable(f, initial_x), linsolve=(x, A, b) -> copyto!(x, A\b)) where T if inplace diff --git a/src/nlsolve/utils.jl b/src/nlsolve/utils.jl index 1abb374..5607c3e 100644 --- a/src/nlsolve/utils.jl +++ b/src/nlsolve/utils.jl @@ -30,8 +30,8 @@ function assess_convergence(x, end function check_isfinite(x::AbstractArray) - if any((y)->!isfinite(y),x) - i = findall((!).(isfinite.(x))) + if any(!isfinite, x) + i = findall(!isfinite, x) throw(IsFiniteException(i)) end end diff --git a/src/solvers/anderson.jl b/src/solvers/anderson.jl index 60dc122..87a798e 100644 --- a/src/solvers/anderson.jl +++ b/src/solvers/anderson.jl @@ -1,8 +1,6 @@ # Notations from Walker & Ni, "Anderson acceleration for fixed-point iterations", SINUM 2011 # Attempts to accelerate the iteration xₙ₊₁ = xₙ + beta*f(xₙ) -struct Anderson{m} end - struct AndersonCache{Tx,To,Tdg,Tg,TQ,TR} <: AbstractSolverCache x::Tx g::Tx @@ -14,34 +12,40 @@ struct AndersonCache{Tx,To,Tdg,Tg,TQ,TR} <: AbstractSolverCache R::TR end -function AndersonCache(df, ::Anderson{m}) where m +function AndersonCache(df, m) x = similar(df.x_f) g = similar(x) - fxold = similar(x) - gold = similar(x) - - # maximum size of history - mmax = min(length(x), m) - - # buffer storing the differences between g of the iterates, from oldest to newest - Δgs = [similar(x) for _ in 1:mmax] - - T = eltype(x) - γs = Vector{T}(undef, mmax) # coefficients obtained from the least-squares problem - - # matrices for QR decomposition - Q = Matrix{T}(undef, length(x), mmax) - R = Matrix{T}(undef, mmax, mmax) + if m > 0 + fxold = similar(x) + gold = similar(x) + + # maximum size of history + mmax = min(length(x), m) + + # buffer storing the differences between g of the iterates, from oldest to newest + Δgs = [similar(x) for _ in 1:mmax] + + T = eltype(x) + γs = Vector{T}(undef, mmax) # coefficients obtained from the least-squares problem + + # matrices for QR decomposition + Q = Matrix{T}(undef, length(x), mmax) + R = Matrix{T}(undef, mmax, mmax) + else + fxold = nothing + gold = nothing + Δgs = nothing + γs = nothing + Q = nothing + R = nothing + end AndersonCache(x, g, fxold, gold, Δgs, γs, Q, R) end -AndersonCache(df, ::Anderson{0}) = - AndersonCache(similar(df.x_f), similar(df.x_f), nothing, nothing, nothing, nothing, nothing, nothing) - @views function anderson_(df::Union{NonDifferentiable, OnceDifferentiable}, - initial_x::AbstractArray{T}, + initial_x::AbstractArray, xtol::Real, ftol::Real, iterations::Integer, @@ -51,7 +55,7 @@ AndersonCache(df, ::Anderson{0}) = beta::Real, aa_start::Integer, droptol::Real, - cache::AndersonCache) where T + cache::AndersonCache) copyto!(cache.x, initial_x) tr = SolverTrace() tracing = store_trace || show_trace || extended_trace @@ -65,9 +69,14 @@ AndersonCache(df, ::Anderson{0}) = while iter < iterations iter += 1 - # fixed-point iteration + # evaluate function value!!(df, cache.x) fx = value(df) + + # check that all values are finite + check_isfinite(fx) + + # compute next iterate of fixed-point iteration @. cache.g = cache.x + beta * fx # save trace @@ -80,7 +89,7 @@ AndersonCache(df, ::Anderson{0}) = update!(tr, iter, maximum(abs, fx), - iter > 1 ? sqeuclidean(cache.g, cache.x) : convert(real(T),NaN), + iter > 1 ? sqeuclidean(cache.g, cache.x) : convert(real(eltype(initial_x)), NaN), dt, store_trace, show_trace) @@ -90,7 +99,7 @@ AndersonCache(df, ::Anderson{0}) = x_converged, f_converged, converged = assess_convergence(cache.g, cache.x, fx, xtol, ftol) converged && break - # define next iterate + # update current iterate copyto!(cache.x, cache.g) # perform Anderson acceleration @@ -144,7 +153,7 @@ AndersonCache(df, ::Anderson{0}) = # solve least squares problem γs = view(cache.γs, 1:m_eff) - ldiv!(R, mul!(γs, Q', fx)) + ldiv!(R, mul!(γs, Q', vec(fx))) # update next iterate for i in 1:m_eff @@ -161,31 +170,31 @@ AndersonCache(df, ::Anderson{0}) = end function anderson(df::Union{NonDifferentiable, OnceDifferentiable}, - initial_x::AbstractArray, - xtol::Real, - ftol::Real, - iterations::Integer, - store_trace::Bool, - show_trace::Bool, - extended_trace::Bool, - m::Integer, - beta::Real, - aa_start::Integer, - droptol::Real) - anderson(df, initial_x, xtol, ftol, iterations, store_trace, show_trace, extended_trace, beta, aa_start, droptol, AndersonCache(df, Anderson{m}())) + initial_x::AbstractArray, + xtol::Real, + ftol::Real, + iterations::Integer, + store_trace::Bool, + show_trace::Bool, + extended_trace::Bool, + m::Integer, + beta::Real, + aa_start::Integer, + droptol::Real) + anderson(df, initial_x, xtol, ftol, iterations, store_trace, show_trace, extended_trace, beta, aa_start, droptol, AndersonCache(df, m)) end function anderson(df::Union{NonDifferentiable, OnceDifferentiable}, - initial_x::AbstractArray{T}, - xtol::Real, - ftol::Real, - iterations::Integer, - store_trace::Bool, - show_trace::Bool, - extended_trace::Bool, - beta::Real, - aa_start::Integer, - droptol::Real, - cache::AndersonCache) where T - anderson_(df, initial_x, convert(real(T), xtol), convert(real(T), ftol), iterations, store_trace, show_trace, extended_trace, beta, aa_start, droptol, cache) + initial_x::AbstractArray, + xtol::Real, + ftol::Real, + iterations::Integer, + store_trace::Bool, + show_trace::Bool, + extended_trace::Bool, + beta::Real, + aa_start::Integer, + droptol::Real, + cache::AndersonCache) + anderson_(df, initial_x, convert(real(eltype(initial_x)), xtol), convert(real(eltype(initial_x)), ftol), iterations, store_trace, show_trace, extended_trace, beta, aa_start, droptol, cache) end diff --git a/test/2by2.jl b/test/2by2.jl index f939676..b0d0148 100644 --- a/test/2by2.jl +++ b/test/2by2.jl @@ -72,4 +72,9 @@ r = nlsolve(df, [ -0.5; 1.4], method = :newton, linesearch = LineSearches.Strong r = nlsolve(df, [ 0.01; .99], method = :anderson, m = 10, beta=.01) @test converged(r) @test norm(r.zero - [ 0; 1]) < 1e-8 +@test r.iterations == 9 + +# without Anderson acceleration fixed-point iteration does not converge in 1_000 iterations +r = nlsolve(df, [ 0.01; .99], method = :anderson, m = 0, beta=.01) +@test !converged(r) end diff --git a/test/abstractarray.jl b/test/abstractarray.jl index 9ed2842..be15969 100644 --- a/test/abstractarray.jl +++ b/test/abstractarray.jl @@ -37,8 +37,8 @@ initial_x = vec(initial_x_matrix) #initial_x_wrapped = WrappedArray(initial_x_matrix) for method in (:trust_region, :newton, :anderson, :broyden) - r = nlsolve(f!, j!, initial_x, method = method) - r_matrix = nlsolve(f!, j!, initial_x_matrix, method = method) + r = nlsolve(f!, j!, initial_x, method = method, m = 2, beta = 1e-3) + r_matrix = nlsolve(f!, j!, initial_x_matrix, method = method, m = 2, beta = 1e-3) #r_wrapped = nlsolve(f!, j!, initial_x_wrapped, method = method) @test r.zero == vec(r_matrix.zero) @@ -48,8 +48,9 @@ for method in (:trust_region, :newton, :anderson, :broyden) @test typeof(r.zero) == typeof(initial_x) @test typeof(r_matrix.zero) == typeof(initial_x_matrix) #@test typeof(r_wrapped.zero) == typeof(initial_x_wrapped) - r_AD = nlsolve(f!, initial_x, method = method, autodiff = :forward) - r_matrix_AD = nlsolve(f!, initial_x_matrix, method = method, autodiff = :forward) + + r_AD = nlsolve(f!, initial_x, method = method, m = 2, beta = 1e-3, autodiff = :forward) + r_matrix_AD = nlsolve(f!, initial_x_matrix, method = method, m = 2, beta = 1e-3, autodiff = :forward) #r_wrapped_AD = nlsolve(f!, initial_x_wrapped, method = method, autodiff = :forward) @test r_AD.zero == vec(r_matrix_AD.zero) diff --git a/test/iface.jl b/test/iface.jl index e5653c6..0bdefd7 100644 --- a/test/iface.jl +++ b/test/iface.jl @@ -106,7 +106,7 @@ r = nlsolve(n_ary(f), [ -0.5; 1.4]) r = nlsolve(n_ary(f), [ -0.5; 1.4], autodiff = :forward) @test converged(r) -@testset "andersont trace issue #160" begin +@testset "anderson trace issue #160" begin function f_2by2!(F, x) F[1] = (x[1]+3)*(x[2]^3-7)+18 diff --git a/test/interface/caches.jl b/test/interface/caches.jl index ca03dca..6947db6 100644 --- a/test/interface/caches.jl +++ b/test/interface/caches.jl @@ -15,7 +15,7 @@ ncp = copy(nc.p) r = NLsolve.newton(df, [ 1.; 1.], 0.1, 0.1, 100, false, false, false, LineSearches.Static(), nc) @test !(ncp == nc.p) -ac = NLsolve.AndersonCache(df, NLsolve.Anderson{10}()) +ac = NLsolve.AndersonCache(df, 10) ac.x .= 22.0 acx = copy(ac.x) r = NLsolve.anderson(df, [ 1.; 1.], 0.1, 0.1, 100, false, false, false, 0.9, 1, 0, ac)