From db2f47ca96bba668d86bf6502e4961d777a2013d Mon Sep 17 00:00:00 2001 From: erantreister Date: Tue, 30 Jul 2019 23:27:27 +0300 Subject: [PATCH] Addition of blockFGMRES + testing. Previously was in multigrid package. --- Project.toml | 2 +- src/KrylovMethods.jl | 1 + src/blockFGMRES.jl | 179 ++++++++++++++++++++++++++++++++++++++++ src/fgmres.jl | 24 +++--- test/runtests.jl | 1 + test/testBlockFGMRES.jl | 59 +++++++++++++ test/testFGMRES.jl | 8 +- 7 files changed, 258 insertions(+), 16 deletions(-) create mode 100644 src/blockFGMRES.jl create mode 100644 test/testBlockFGMRES.jl diff --git a/Project.toml b/Project.toml index 1aa8e21..99686b0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "KrylovMethods" uuid = "9a2cd570-f05c-5dc1-9209-93ad6f5727f7" -version = "0.5.0" +version = "0.6.0" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/src/KrylovMethods.jl b/src/KrylovMethods.jl index d15f984..818ad5e 100644 --- a/src/KrylovMethods.jl +++ b/src/KrylovMethods.jl @@ -13,6 +13,7 @@ module KrylovMethods include("blockBiCGSTB.jl") include("gmres.jl") include("fgmres.jl") + include("blockFGMRES.jl") include("lanczosBidiag.jl") include("ssor.jl") include("lsqr.jl") diff --git a/src/blockFGMRES.jl b/src/blockFGMRES.jl new file mode 100644 index 0000000..05a3ac9 --- /dev/null +++ b/src/blockFGMRES.jl @@ -0,0 +1,179 @@ + +import Base.isempty +using LinearAlgebra +export blockFGMRES + +function blockFGMRES(A::SparseMatrixCSC{T1,Int},b::Array{T2,2},restrt::Int; kwargs...) where {T1,T2} + Ax = zeros(promote_type(T1,T2),size(b)) + return blockFGMRES(X -> mul!(Ax,A,X,1.0,0.0),b,restrt;kwargs...) +end + +blockFGMRES(A,b::Array{Any,2},restrt;kwargs...) = blockFGMRES(x -> A*x ,b,restrt;kwargs...) + + +""" +x,flag,err,iter,resvec = blockFGMRES(A,b,restrt,tol=1e-2,maxIter=100,M=1,x=[],out=0) + +Block (flexible) Generalized Minimal residual ( (F)GMRES(m) ) method with restarts applied to A*x = b. + +This is the "right preconditioning" version of blockGMRES: A*M^{-1}Mx = b. + +Input: + +A - function computing A*x +b - right hand side matrix (set of vectors) +restrt - number of iterations between restarts +tol - error tolerance +maxIter - maximum number of iterations +M - preconditioner, function computing M\\x +x - starting guess +out - flag for output (0 : only errors, 1 : final status, 2: error at each iteration) + +Output: + +x - approximate solution +flag - exit flag ( 0 : desired tolerance achieved, + -1 : maxIter reached without converging + -9 : right hand side was zero) + err - norm of relative residual, i.e., norm(A*x-b)/norm(b) + iter - number of iterations + resvec - norm of relative residual at each iteration + + preconditioner M(r) must return a copy of a matrix. Cannot reuse memory of r. + """ +function blockFGMRES(A::Function,B::Array,restrt::Int; tol::Real=1e-2,maxIter::Int=100,M::Function=t->copy(t),X::Array=[],out::Int=0,flexible::Bool=false,mem::FGMRESmem = getEmptyFGMRESmem()) + # initialization + n = size(B,1) + m = size(B,2) + TYPE = eltype(B) + mem = checkMemorySize(mem,n,restrt,TYPE,flexible,m); + + if norm(B)==0.0 + return zeros(TYPE,n,m),-9,0.0,0,[0.0]; + end + + if Base.isempty(X) + X = zeros(eltype(B),n,m) + R = copy(B); + elseif norm(X) < eps(real(TYPE)) + R = copy(B); + else + R = copy(B); + R.-=A(X); + end + + if eltype(B) <: Complex + X = complex(X) + end + if issparse(X) + error("X is sparse"); + end + + rnorm0 = norm(B); + + err = norm(R)/rnorm0 + if err < tol; return X, err; end + + constOne = one(TYPE); + constZero = zero(TYPE); + + restrt = min(restrt,n-1) + Zbig = mem.Z; + Vbig = mem.V; + H = zeros(TYPE,(restrt+1)*m,restrt*m); + xi = zeros(TYPE,(restrt+1)*m,m); + T = zeros(TYPE,restrt*m,m); + + W = zeros(TYPE,0); + Z = zeros(TYPE,0); + + resvec = zeros(restrt*maxIter) + if out==2 + println(@sprintf("=== blockFGMRES ===\n%4s\t%7s\n","iter","relres")) + end + + + flag = -1 + counter = 0 + iter = 0 + while iter < maxIter + iter+=1; + Q = qr!(R); + Betta = Q.R; + R[:] = Matrix(Q.Q); # this code is equivalent to (W,Betta) = qr(r); + xi[1:m,:] = Betta; + #BLAS.scal!(n,(1/betta)*constOne,r,1); # w = w./betta + H[:] .= 0.0; + T[:] .= 0.0; + + if out==2;; print(@sprintf("%3d\t", iter));end + + for j = 1:restrt + colSet_j = (((j-1)*m)+1):j*m + if j==1 + Vbig[:,colSet_j] = R; # no memory problem with this line.... + Z = M(R); + else + Vbig[:,colSet_j] = W; # no memory problem with this line.... + Z = M(W); + end + if flexible + Zbig[:,colSet_j] = Z; + end + W = A(Z); + counter += 1; + # Gram Schmidt (much faster than MGS even though zeros are multiplied, does relatively well): + BLAS.gemm!('C','N', constOne, Vbig, W,constZero,T); # t = V'*w; + T[(j*m+1):end,:] .= 0.0; + H[1:(restrt*m),colSet_j] = T; + BLAS.gemm!('N','N', -constOne, Vbig, T,constOne,W); # w = w - V*t + Q = qr!(W); Betta = Q.R; W[:] = Matrix(Q.Q); # this code is equivalent to (W,Betta) = qr(W); + H[((j*m)+1):(j+1)*m,colSet_j] = Betta; + y = H[1:(j+1)*m,1:j*m]\xi[1:(j+1)*m,:]; + err = norm(H[1:(j+1)*m,1:j*m]*y - xi[1:(j+1)*m,:])/rnorm0 + + if out==2 print(@sprintf("%1.1e ", err)); end + resvec[counter] = err; + if err <= tol + if flexible + Zbig[:,(j*m+1):end] .= 0.0; + else + Vbig[:,(j*m+1):end] .= 0.0; + end + if out==2; print("\n"); end + flag = 0; break + end + end # end for j to restrt + + y = pinv(H)*xi; + + if flexible + BLAS.gemm!('N','N', constOne, Zbig, y,constZero,W); # w = Z*y #This is the correction that corresponds to the residual. + else + # W = Vbig*y; + BLAS.gemm!('N','N', constOne, Vbig, y, constZero, W); + Z = M(W); + W[:] = Z; + end + X .+= W; + + if out==2; print("\n"); end + if err <= tol + flag = 0; + break + end + if iter < maxIter + R[:] = B; + W = A(X); + R .-= W; + end + end #for iter to maxiter + if out>=0 + if flag==-1 + println(@sprintf("blockFGMRES iterated maxIter (=%d) outer iterations without achieving the desired tolerance. Acheived: %1.2e",maxIter,err)) + elseif flag==0 && out>=1 + println(@sprintf("blockFGMRES achieved desired tolerance at inner iteration %d. Residual norm is %1.2e.",counter,resvec[counter])) + end + end + return X,flag,resvec[counter],iter,resvec[1:counter] +end #blockFGMRES diff --git a/src/fgmres.jl b/src/fgmres.jl index 753d7a8..d68a8b3 100644 --- a/src/fgmres.jl +++ b/src/fgmres.jl @@ -15,11 +15,11 @@ mutable struct FGMRESmem Z ::Array end -function getFGMRESmem(n::Int,flexible::Bool,T::Type,k::Int) +function getFGMRESmem(n::Int,flexible::Bool,T::Type,k::Int,nrhs::Int=1) if flexible - return FGMRESmem(zeros(T,n,k),zeros(T,n,k)); + return FGMRESmem(zeros(T,n,k*nrhs),zeros(T,n,k*nrhs)); else - return FGMRESmem(zeros(T,n,k),zeros(T,0)); + return FGMRESmem(zeros(T,n,k*nrhs),zeros(T,0)); end end @@ -31,14 +31,14 @@ function isempty(mem::FGMRESmem) return size(mem.V,1)==0; end -function checkMemorySize(mem::FGMRESmem,n::Int,k::Int,TYPE::Type,flexible::Bool) +function checkMemorySize(mem::FGMRESmem,n::Int,k::Int,TYPE::Type,flexible::Bool,nrhs::Int=1) if isempty(mem) # warn("Allocating memory in FGMRES") - mem = getFGMRESmem(n,flexible,TYPE,k); + mem = getFGMRESmem(n,flexible,TYPE,k,nrhs); return mem else - if size(mem.V,2)!= k - error("FGMRES: size of Krylov subspace is different than inner"); + if size(mem.V,2)!= k*nrhs + error("FGMRES: size of Krylov subspace is different than inner*nrhs"); end end end @@ -71,7 +71,7 @@ flag - exit flag ( 0 : desired tolerance achieved, iter - number of iterations resvec - norm of relative residual at each iteration - preconditioner M(r) must return a copy of a vector. Cannot reuse memory of + preconditioner M(r) must return a copy of a vector. Cannot reuse memory of r. """ function fgmres(A::Function,b::Vector,restrt::Int; tol::Real=1e-2,maxIter::Int=100,M::Function=t->copy(t),x::Vector=[],out::Int=0,storeInterm::Bool=false,flexible::Bool=false,mem::FGMRESmem = getEmptyFGMRESmem()) # initialization @@ -91,7 +91,8 @@ function fgmres(A::Function,b::Vector,restrt::Int; tol::Real=1e-2,maxIter::Int=1 elseif norm(x) < eps(real(TYPE)) r = copy(b); else - r = b-A(x); + r = copy(b); + r.-=A(x); end if eltype(b) <: Complex @@ -214,8 +215,9 @@ function fgmres(A::Function,b::Vector,restrt::Int; tol::Real=1e-2,maxIter::Int=1 flag = 0; break end - if maxIter > 1 - r = b - A(x); + if iter < maxIter + r = copy(b); + r.-=A(x); betta = norm(r) end # if out==2; print(@sprintf("\t %1.1e\n", err)); end diff --git a/test/runtests.jl b/test/runtests.jl index 3176c17..7979f64 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -17,6 +17,7 @@ include("testCGLS.jl") include("testGS.jl") include("testGMRES.jl") include("testFGMRES.jl") +include("testBlockFGMRES.jl") include("testLANCZOS.jl") include("testSSOR.jl") include("testLSQR.jl") diff --git a/test/testBlockFGMRES.jl b/test/testBlockFGMRES.jl new file mode 100644 index 0000000..3b55182 --- /dev/null +++ b/test/testBlockFGMRES.jl @@ -0,0 +1,59 @@ + +println("*******************************************************") +@testset "blockFGMRES" begin + @testset "real matrix" begin + n = 50; + A = sprandn(n,n,.1) + SparseMatrixCSC(10.0I, n, n) + m = 2; + D = Vector(diag(A)) + M2 = x -> D.\x + rhs = randn(n,m) + tol = 1e-6; + + # test printing and behaviour for early stopping + xtt = blockFGMRES(A,rhs ,3,tol=1e-12,maxIter=3,out=2) + @test xtt[2]==-1 + + # test behaviour for zero rhs + xtt = blockFGMRES(A,0*rhs,3,tol=tol,maxIter=10,out=2) + @test xtt[2]==-9 + @test all(xtt[1].==0.0) + @test length(xtt[1])==m*n + @test eltype(xtt[1])==eltype(rhs) + + println("-----------------------------------") + x1 = blockFGMRES(A,rhs ,5,tol=tol,maxIter=100,out=1) + x3 = blockFGMRES(A,rhs,5,tol=tol,maxIter=100,X=randn(size(rhs)),flexible=true) + x4 = blockFGMRES(A,rhs,5,tol=tol,maxIter=100,M=M2,flexible = true) + + @test norm(A*x1[1]-rhs)/norm(rhs) < tol + @test norm(A*x3[1]-rhs)/norm(rhs) < tol + @test norm(A*x4[1]-rhs)/norm(rhs) < tol + @test norm(x3[1]-x1[1])/norm(x1[1]) < 1e-5 + end + println("*****************************************************************************") + @testset "complex matrix" begin + A = sprandn(100,100,.1) + SparseMatrixCSC(10.0I, 100, 100) + 1im*(sprandn(100,100,.1) + SparseMatrixCSC(10.0I, 100, 100) ) + m = 3; + D = Vector(diag(A)) + M3 = x -> x./D + rhs = complex(randn(100,m)) + tol = 1e-6; + + # test behaviour for zero rhs + xtt = blockFGMRES(A,0.0*rhs,5,tol=tol,maxIter=10,out=2) + @test xtt[2]==-9 + @test all(xtt[1].==0) + @test length(xtt[1])==100*m + @test eltype(xtt[1])==eltype(rhs) + + + x1 = blockFGMRES(A,rhs ,5,tol=tol,maxIter=100) + x3 = blockFGMRES(A,rhs,5,tol=tol,maxIter=100,X=randn(size(rhs)),flexible = true) + x4 = blockFGMRES(A,rhs,5,tol=tol,maxIter=100,M=M3,flexible = true) + + @test norm(A*x1[1]-rhs)/norm(rhs) < tol + @test norm(A*x3[1]-rhs)/norm(rhs) < tol + @test norm(x3[1]-x1[1])/norm(x1[1]) < 1e-5 + end +end \ No newline at end of file diff --git a/test/testFGMRES.jl b/test/testFGMRES.jl index ed76bb1..c982ae7 100644 --- a/test/testFGMRES.jl +++ b/test/testFGMRES.jl @@ -5,12 +5,12 @@ println("*******************************************************") A = sprandn(100,100,.1) + SparseMatrixCSC(10.0I, 100, 100) n = size(A,2) D = diag(A) - M2 = x -> D.\x + M2 = x -> Vector(D.\x) rhs = randn(100) tol = 1e-6; # test printing and behaviour for early stopping - xtt = fgmres(A,rhs ,3,tol=1e-10,maxIter=3,out=2,storeInterm=true) + xtt = fgmres(A,rhs ,3,tol=1e-12,maxIter=3,out=2,storeInterm=true) @test xtt[2]==-1 # test behaviour for zero rhs @@ -22,8 +22,8 @@ println("*******************************************************") x1 = fgmres(A,rhs ,5,tol=tol,maxIter=100,out=1) - x3 = fgmres(A,rhs,5,tol=tol,maxIter=100,x=randn(size(rhs))) - x4 = fgmres(A,rhs,5,tol=tol,maxIter=100,M=M2) + x3 = fgmres(A,rhs,5,tol=tol,maxIter=100,x=randn(size(rhs)),flexible = true) + x4 = fgmres(A,rhs,5,tol=tol,maxIter=100,M=M2,flexible = true) @test norm(A*x1[1]-rhs)/norm(rhs) < tol @test norm(A*x3[1]-rhs)/norm(rhs) < tol