From 9b4c7b37e6fd2911d1fab77ebeabd450a039712b Mon Sep 17 00:00:00 2001
From: David Widmann <devmotion@users.noreply.github.com>
Date: Wed, 14 Aug 2019 22:29:09 +0200
Subject: [PATCH] Fix Anderson for inputs of arbitrary dimension (#223)

* Use Walker's defaults for m and droptol

* Remove Anderson struct

* Check if values are finite

* Fix Anderson acceleration for inputs of arbitrary dimension

* Remove where clauses

* Add test

* Fix typo
---
 src/nlsolve/nlsolve.jl   |  16 +++---
 src/nlsolve/utils.jl     |   4 +-
 src/solvers/anderson.jl  | 111 +++++++++++++++++++++------------------
 test/2by2.jl             |   5 ++
 test/abstractarray.jl    |   9 ++--
 test/iface.jl            |   2 +-
 test/interface/caches.jl |   2 +-
 7 files changed, 82 insertions(+), 67 deletions(-)

diff --git a/src/nlsolve/nlsolve.jl b/src/nlsolve/nlsolve.jl
index bd7a6fd..96bd740 100644
--- a/src/nlsolve/nlsolve.jl
+++ b/src/nlsolve/nlsolve.jl
@@ -11,10 +11,10 @@ function nlsolve(df::TDF,
                  linsolve=(x, A, b) -> copyto!(x, A\b),
                  factor::Real = one(real(T)),
                  autoscale::Bool = true,
-                 m::Integer = 0,
+                 m::Integer = 10,
                  beta::Real = 1,
                  aa_start::Integer = 1,
-                 droptol::Real = 0) where {T, TDF <: Union{NonDifferentiable, OnceDifferentiable}}
+                 droptol::Real = convert(real(T), 1e10)) where {T, TDF <: Union{NonDifferentiable, OnceDifferentiable}}
     if show_trace
         @printf "Iter     f(x) inf-norm    Step 2-norm \n"
         @printf "------   --------------   --------------\n"
@@ -49,10 +49,10 @@ function nlsolve(f,
                  linesearch = LineSearches.Static(),
                  factor::Real = one(real(T)),
                  autoscale::Bool = true,
-                 m::Integer = 0,
+                 m::Integer = 10,
                  beta::Real = 1,
                  aa_start::Integer = 1,
-                 droptol::Real = 0,
+                 droptol::Real = convert(real(T), 1e10),
                  autodiff = :central,
                  linsolve=(x, A, b) -> copyto!(x, A\b),
                  inplace = !applicable(f, initial_x)) where T
@@ -92,10 +92,10 @@ function nlsolve(f,
                 linesearch = LineSearches.Static(),
                 factor::Real = one(real(T)),
                 autoscale::Bool = true,
-                m::Integer = 0,
+                m::Integer = 10,
                 beta::Real = 1,
                 aa_start::Integer = 1,
-                droptol::Real = 0,
+                droptol::Real = convert(real(T), 1e10),
                 inplace = !applicable(f, initial_x),
                 linsolve=(x, A, b) -> copyto!(x, A\b)) where T
     if inplace
@@ -125,10 +125,10 @@ function nlsolve(f,
                 linesearch = LineSearches.Static(),
                 factor::Real = one(real(T)),
                 autoscale::Bool = true,
-                m::Integer = 0,
+                m::Integer = 10,
                 beta::Real = 1,
                 aa_start::Integer = 1,
-                droptol::Real = 0,
+                droptol::Real = convert(real(T), 1e10),
                 inplace = !applicable(f, initial_x),
                 linsolve=(x, A, b) -> copyto!(x, A\b)) where T
     if inplace
diff --git a/src/nlsolve/utils.jl b/src/nlsolve/utils.jl
index 1abb374..5607c3e 100644
--- a/src/nlsolve/utils.jl
+++ b/src/nlsolve/utils.jl
@@ -30,8 +30,8 @@ function assess_convergence(x,
 end
 
 function check_isfinite(x::AbstractArray)
-    if any((y)->!isfinite(y),x)
-        i = findall((!).(isfinite.(x)))
+    if any(!isfinite, x)
+        i = findall(!isfinite, x)
         throw(IsFiniteException(i))
     end
 end
diff --git a/src/solvers/anderson.jl b/src/solvers/anderson.jl
index 60dc122..87a798e 100644
--- a/src/solvers/anderson.jl
+++ b/src/solvers/anderson.jl
@@ -1,8 +1,6 @@
 # Notations from Walker & Ni, "Anderson acceleration for fixed-point iterations", SINUM 2011
 # Attempts to accelerate the iteration xₙ₊₁ = xₙ + beta*f(xₙ)
 
-struct Anderson{m} end
-
 struct AndersonCache{Tx,To,Tdg,Tg,TQ,TR} <: AbstractSolverCache
     x::Tx
     g::Tx
@@ -14,34 +12,40 @@ struct AndersonCache{Tx,To,Tdg,Tg,TQ,TR} <: AbstractSolverCache
     R::TR
 end
 
-function AndersonCache(df, ::Anderson{m}) where m
+function AndersonCache(df, m)
     x = similar(df.x_f)
     g = similar(x)
 
-    fxold = similar(x)
-    gold = similar(x)
-
-    # maximum size of history
-    mmax = min(length(x), m)
-
-    # buffer storing the differences between g of the iterates, from oldest to newest
-    Δgs = [similar(x) for _ in 1:mmax]
-
-    T = eltype(x)
-    γs = Vector{T}(undef, mmax) # coefficients obtained from the least-squares problem
-
-    # matrices for QR decomposition
-    Q = Matrix{T}(undef, length(x), mmax)
-    R = Matrix{T}(undef, mmax, mmax)
+    if m > 0
+        fxold = similar(x)
+        gold = similar(x)
+
+        # maximum size of history
+        mmax = min(length(x), m)
+
+        # buffer storing the differences between g of the iterates, from oldest to newest
+        Δgs = [similar(x) for _ in 1:mmax]
+
+        T = eltype(x)
+        γs = Vector{T}(undef, mmax) # coefficients obtained from the least-squares problem
+
+        # matrices for QR decomposition
+        Q = Matrix{T}(undef, length(x), mmax)
+        R = Matrix{T}(undef, mmax, mmax)
+    else
+        fxold = nothing
+        gold = nothing
+        Δgs = nothing
+        γs = nothing
+        Q = nothing
+        R = nothing
+    end
 
     AndersonCache(x, g, fxold, gold, Δgs, γs, Q, R)
 end
 
-AndersonCache(df, ::Anderson{0}) =
-    AndersonCache(similar(df.x_f), similar(df.x_f), nothing, nothing, nothing, nothing, nothing, nothing)
-
 @views function anderson_(df::Union{NonDifferentiable, OnceDifferentiable},
-                             initial_x::AbstractArray{T},
+                             initial_x::AbstractArray,
                              xtol::Real,
                              ftol::Real,
                              iterations::Integer,
@@ -51,7 +55,7 @@ AndersonCache(df, ::Anderson{0}) =
                              beta::Real,
                              aa_start::Integer,
                              droptol::Real,
-                             cache::AndersonCache) where T
+                             cache::AndersonCache)
     copyto!(cache.x, initial_x)
     tr = SolverTrace()
     tracing = store_trace || show_trace || extended_trace
@@ -65,9 +69,14 @@ AndersonCache(df, ::Anderson{0}) =
     while iter < iterations
         iter += 1
 
-        # fixed-point iteration
+        # evaluate function
         value!!(df, cache.x)
         fx = value(df)
+
+        # check that all values are finite
+        check_isfinite(fx)
+
+        # compute next iterate of fixed-point iteration
         @. cache.g = cache.x + beta * fx
 
         # save trace
@@ -80,7 +89,7 @@ AndersonCache(df, ::Anderson{0}) =
             update!(tr,
                     iter,
                     maximum(abs, fx),
-                    iter > 1 ? sqeuclidean(cache.g, cache.x) : convert(real(T),NaN),
+                    iter > 1 ? sqeuclidean(cache.g, cache.x) : convert(real(eltype(initial_x)), NaN),
                     dt,
                     store_trace,
                     show_trace)
@@ -90,7 +99,7 @@ AndersonCache(df, ::Anderson{0}) =
         x_converged, f_converged, converged = assess_convergence(cache.g, cache.x, fx, xtol, ftol)
         converged && break
 
-        # define next iterate
+        # update current iterate
         copyto!(cache.x, cache.g)
 
         # perform Anderson acceleration
@@ -144,7 +153,7 @@ AndersonCache(df, ::Anderson{0}) =
 
                 # solve least squares problem
                 γs = view(cache.γs, 1:m_eff)
-                ldiv!(R, mul!(γs, Q', fx))
+                ldiv!(R, mul!(γs, Q', vec(fx)))
 
                 # update next iterate
                 for i in 1:m_eff
@@ -161,31 +170,31 @@ AndersonCache(df, ::Anderson{0}) =
 end
 
 function anderson(df::Union{NonDifferentiable, OnceDifferentiable},
-                     initial_x::AbstractArray,
-                     xtol::Real,
-                     ftol::Real,
-                     iterations::Integer,
-                     store_trace::Bool,
-                     show_trace::Bool,
-                     extended_trace::Bool,
-                     m::Integer,
-                     beta::Real,
-                     aa_start::Integer,
-                     droptol::Real)
-    anderson(df, initial_x, xtol, ftol, iterations, store_trace, show_trace, extended_trace, beta, aa_start, droptol, AndersonCache(df, Anderson{m}()))
+                  initial_x::AbstractArray,
+                  xtol::Real,
+                  ftol::Real,
+                  iterations::Integer,
+                  store_trace::Bool,
+                  show_trace::Bool,
+                  extended_trace::Bool,
+                  m::Integer,
+                  beta::Real,
+                  aa_start::Integer,
+                  droptol::Real)
+    anderson(df, initial_x, xtol, ftol, iterations, store_trace, show_trace, extended_trace, beta, aa_start, droptol, AndersonCache(df, m))
 end
 
 function anderson(df::Union{NonDifferentiable, OnceDifferentiable},
-                     initial_x::AbstractArray{T},
-                     xtol::Real,
-                     ftol::Real,
-                     iterations::Integer,
-                     store_trace::Bool,
-                     show_trace::Bool,
-                     extended_trace::Bool,
-                     beta::Real,
-                     aa_start::Integer,
-                     droptol::Real,
-                     cache::AndersonCache) where T
-    anderson_(df, initial_x, convert(real(T), xtol), convert(real(T), ftol), iterations, store_trace, show_trace, extended_trace, beta, aa_start, droptol, cache)
+                  initial_x::AbstractArray,
+                  xtol::Real,
+                  ftol::Real,
+                  iterations::Integer,
+                  store_trace::Bool,
+                  show_trace::Bool,
+                  extended_trace::Bool,
+                  beta::Real,
+                  aa_start::Integer,
+                  droptol::Real,
+                  cache::AndersonCache)
+    anderson_(df, initial_x, convert(real(eltype(initial_x)), xtol), convert(real(eltype(initial_x)), ftol), iterations, store_trace, show_trace, extended_trace, beta, aa_start, droptol, cache)
 end
diff --git a/test/2by2.jl b/test/2by2.jl
index f939676..b0d0148 100644
--- a/test/2by2.jl
+++ b/test/2by2.jl
@@ -72,4 +72,9 @@ r = nlsolve(df, [ -0.5; 1.4], method = :newton, linesearch = LineSearches.Strong
 r = nlsolve(df, [ 0.01; .99], method = :anderson, m = 10, beta=.01)
 @test converged(r)
 @test norm(r.zero - [ 0; 1]) < 1e-8
+@test r.iterations == 9
+
+# without Anderson acceleration fixed-point iteration does not converge in 1_000 iterations
+r = nlsolve(df, [ 0.01; .99], method = :anderson, m = 0, beta=.01)
+@test !converged(r)
 end
diff --git a/test/abstractarray.jl b/test/abstractarray.jl
index 9ed2842..be15969 100644
--- a/test/abstractarray.jl
+++ b/test/abstractarray.jl
@@ -37,8 +37,8 @@ initial_x = vec(initial_x_matrix)
 #initial_x_wrapped = WrappedArray(initial_x_matrix)
 
 for method in (:trust_region, :newton, :anderson, :broyden)
-    r = nlsolve(f!, j!, initial_x, method = method)
-    r_matrix = nlsolve(f!, j!, initial_x_matrix, method = method)
+    r = nlsolve(f!, j!, initial_x, method = method, m = 2, beta = 1e-3)
+    r_matrix = nlsolve(f!, j!, initial_x_matrix, method = method, m = 2, beta = 1e-3)
     #r_wrapped = nlsolve(f!, j!, initial_x_wrapped, method = method)
 
     @test r.zero == vec(r_matrix.zero)
@@ -48,8 +48,9 @@ for method in (:trust_region, :newton, :anderson, :broyden)
     @test typeof(r.zero) == typeof(initial_x)
     @test typeof(r_matrix.zero) == typeof(initial_x_matrix)
     #@test typeof(r_wrapped.zero) == typeof(initial_x_wrapped)
-    r_AD = nlsolve(f!, initial_x, method = method, autodiff = :forward)
-    r_matrix_AD = nlsolve(f!, initial_x_matrix, method = method, autodiff = :forward)
+
+    r_AD = nlsolve(f!, initial_x, method = method, m = 2, beta = 1e-3, autodiff = :forward)
+    r_matrix_AD = nlsolve(f!, initial_x_matrix, method = method, m = 2, beta = 1e-3, autodiff = :forward)
     #r_wrapped_AD = nlsolve(f!, initial_x_wrapped, method = method, autodiff = :forward)
 
     @test r_AD.zero == vec(r_matrix_AD.zero)
diff --git a/test/iface.jl b/test/iface.jl
index e5653c6..0bdefd7 100644
--- a/test/iface.jl
+++ b/test/iface.jl
@@ -106,7 +106,7 @@ r = nlsolve(n_ary(f), [ -0.5; 1.4])
 r = nlsolve(n_ary(f), [ -0.5; 1.4], autodiff = :forward)
 @test converged(r)
 
-@testset "andersont trace issue #160" begin
+@testset "anderson trace issue #160" begin
 
     function f_2by2!(F, x)
         F[1] = (x[1]+3)*(x[2]^3-7)+18
diff --git a/test/interface/caches.jl b/test/interface/caches.jl
index ca03dca..6947db6 100644
--- a/test/interface/caches.jl
+++ b/test/interface/caches.jl
@@ -15,7 +15,7 @@ ncp = copy(nc.p)
 r = NLsolve.newton(df, [ 1.; 1.], 0.1, 0.1, 100, false, false, false, LineSearches.Static(), nc)
 @test !(ncp == nc.p)
 
-ac = NLsolve.AndersonCache(df, NLsolve.Anderson{10}())
+ac = NLsolve.AndersonCache(df, 10)
 ac.x .= 22.0
 acx = copy(ac.x)
 r = NLsolve.anderson(df, [ 1.; 1.], 0.1, 0.1, 100, false, false, false, 0.9, 1, 0, ac)