From db2f47ca96bba668d86bf6502e4961d777a2013d Mon Sep 17 00:00:00 2001
From: erantreister <erantreister@gmail.com>
Date: Tue, 30 Jul 2019 23:27:27 +0300
Subject: [PATCH] Addition of blockFGMRES + testing. Previously was in
 multigrid package.

---
 Project.toml            |   2 +-
 src/KrylovMethods.jl    |   1 +
 src/blockFGMRES.jl      | 179 ++++++++++++++++++++++++++++++++++++++++
 src/fgmres.jl           |  24 +++---
 test/runtests.jl        |   1 +
 test/testBlockFGMRES.jl |  59 +++++++++++++
 test/testFGMRES.jl      |   8 +-
 7 files changed, 258 insertions(+), 16 deletions(-)
 create mode 100644 src/blockFGMRES.jl
 create mode 100644 test/testBlockFGMRES.jl

diff --git a/Project.toml b/Project.toml
index 1aa8e21..99686b0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "KrylovMethods"
 uuid = "9a2cd570-f05c-5dc1-9209-93ad6f5727f7"
-version = "0.5.0"
+version = "0.6.0"
 
 [deps]
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/src/KrylovMethods.jl b/src/KrylovMethods.jl
index d15f984..818ad5e 100644
--- a/src/KrylovMethods.jl
+++ b/src/KrylovMethods.jl
@@ -13,6 +13,7 @@ module KrylovMethods
 	include("blockBiCGSTB.jl")
 	include("gmres.jl")	
 	include("fgmres.jl")	
+	include("blockFGMRES.jl")	
 	include("lanczosBidiag.jl")
 	include("ssor.jl")	
 	include("lsqr.jl")	
diff --git a/src/blockFGMRES.jl b/src/blockFGMRES.jl
new file mode 100644
index 0000000..05a3ac9
--- /dev/null
+++ b/src/blockFGMRES.jl
@@ -0,0 +1,179 @@
+
+import Base.isempty
+using LinearAlgebra
+export blockFGMRES
+
+function blockFGMRES(A::SparseMatrixCSC{T1,Int},b::Array{T2,2},restrt::Int; kwargs...) where {T1,T2}
+	Ax = zeros(promote_type(T1,T2),size(b))
+	return blockFGMRES(X -> mul!(Ax,A,X,1.0,0.0),b,restrt;kwargs...)
+end
+
+blockFGMRES(A,b::Array{Any,2},restrt;kwargs...) = blockFGMRES(x -> A*x ,b,restrt;kwargs...) 
+
+
+"""
+x,flag,err,iter,resvec = blockFGMRES(A,b,restrt,tol=1e-2,maxIter=100,M=1,x=[],out=0)
+
+Block (flexible) Generalized Minimal residual ( (F)GMRES(m) ) method with restarts applied to A*x = b.
+
+This is the "right preconditioning" version of blockGMRES: A*M^{-1}Mx = b. 
+
+Input:
+
+A       - function computing A*x
+b       - right hand side matrix (set of vectors)
+restrt  - number of iterations between restarts
+tol     - error tolerance
+maxIter - maximum number of iterations
+M       - preconditioner, function computing M\\x
+x       - starting guess
+out     - flag for output (0 : only errors, 1 : final status, 2: error at each iteration)
+
+Output:
+
+x       - approximate solution
+flag    - exit flag (  0 : desired tolerance achieved,
+	-1 : maxIter reached without converging
+	-9 : right hand side was zero)
+	err     - norm of relative residual, i.e., norm(A*x-b)/norm(b)
+	iter    - number of iterations
+	resvec  - norm of relative residual at each iteration
+	
+	preconditioner M(r) must return a copy of a matrix. Cannot reuse memory of r.
+	"""
+function blockFGMRES(A::Function,B::Array,restrt::Int; tol::Real=1e-2,maxIter::Int=100,M::Function=t->copy(t),X::Array=[],out::Int=0,flexible::Bool=false,mem::FGMRESmem =  getEmptyFGMRESmem())
+  # initialization
+  n  = size(B,1)
+  m  = size(B,2)
+  TYPE = eltype(B)
+  mem = checkMemorySize(mem,n,restrt,TYPE,flexible,m);
+
+  if norm(B)==0.0 
+	return zeros(TYPE,n,m),-9,0.0,0,[0.0]; 
+  end
+	
+  if Base.isempty(X)
+    X = zeros(eltype(B),n,m)
+	R = copy(B);
+  elseif norm(X) < eps(real(TYPE))
+	R = copy(B);
+  else
+	R = copy(B);
+	R.-=A(X);
+  end
+  
+  if eltype(B) <: Complex 
+       X = complex(X)
+   end
+  if issparse(X)
+	error("X is sparse");
+  end
+  
+  rnorm0 = norm(B);
+
+  err = norm(R)/rnorm0
+  if err < tol; return X, err; end
+
+  constOne = one(TYPE);
+  constZero = zero(TYPE);
+
+  restrt = min(restrt,n-1)
+  Zbig = mem.Z;
+  Vbig = mem.V;
+  H = zeros(TYPE,(restrt+1)*m,restrt*m);
+  xi = zeros(TYPE,(restrt+1)*m,m);
+  T = zeros(TYPE,restrt*m,m);
+ 
+  W = zeros(TYPE,0);
+  Z = zeros(TYPE,0);
+  
+  resvec = zeros(restrt*maxIter)
+  if out==2
+    println(@sprintf("=== blockFGMRES ===\n%4s\t%7s\n","iter","relres"))
+  end
+
+  
+  flag = -1
+	counter = 0
+	iter = 0
+	while iter < maxIter
+		iter+=1;
+		Q = qr!(R); 
+		Betta = Q.R; 
+		R[:] = Matrix(Q.Q); # this code is equivalent to (W,Betta) = qr(r);
+		xi[1:m,:] = Betta;
+		#BLAS.scal!(n,(1/betta)*constOne,r,1); # w = w./betta
+		H[:] .= 0.0;
+		T[:] .= 0.0;
+		
+		if out==2;; print(@sprintf("%3d\t", iter));end
+		
+		for j = 1:restrt
+			colSet_j = (((j-1)*m)+1):j*m
+			if j==1
+				Vbig[:,colSet_j] = R;  # no memory problem with this line....
+				Z = M(R); 
+			else
+				Vbig[:,colSet_j] = W;  # no memory problem with this line....
+				Z = M(W);
+			end
+			if flexible
+				Zbig[:,colSet_j] = Z;
+			end
+			W = A(Z);
+			counter += 1;
+			# Gram Schmidt (much faster than MGS even though zeros are multiplied, does relatively well):
+			BLAS.gemm!('C','N', constOne, Vbig, W,constZero,T); # t = V'*w;
+			T[(j*m+1):end,:] .= 0.0;
+			H[1:(restrt*m),colSet_j] = T;
+			BLAS.gemm!('N','N', -constOne, Vbig, T,constOne,W); # w = w - V*t  
+			Q = qr!(W); Betta = Q.R; W[:] = Matrix(Q.Q); # this code is equivalent to (W,Betta) = qr(W);
+			H[((j*m)+1):(j+1)*m,colSet_j] = Betta;
+			y = H[1:(j+1)*m,1:j*m]\xi[1:(j+1)*m,:];
+			err = norm(H[1:(j+1)*m,1:j*m]*y - xi[1:(j+1)*m,:])/rnorm0
+		
+			if out==2 print(@sprintf("%1.1e ", err)); end
+			resvec[counter] = err;
+			if err <= tol
+				if flexible
+					Zbig[:,(j*m+1):end] .= 0.0;
+				else
+					Vbig[:,(j*m+1):end] .= 0.0;
+				end
+				if out==2; print("\n"); end
+				flag = 0; break
+			end
+		end # end for j to restrt
+		
+		y = pinv(H)*xi;
+		
+		if flexible
+			BLAS.gemm!('N','N', constOne, Zbig, y,constZero,W); # w = Z*y  #This is the correction that corresponds to the residual.
+		else
+			# W = Vbig*y;
+			BLAS.gemm!('N','N', constOne, Vbig, y, constZero, W);
+			Z = M(W);
+			W[:] = Z;
+		end
+		X .+= W;
+		
+		if out==2; print("\n"); end
+		if err <= tol
+			flag = 0;
+			break
+		end
+		if iter < maxIter
+			R[:] = B;
+			W = A(X);
+			R .-= W;
+		end
+	end #for iter to maxiter
+	if out>=0
+		if flag==-1
+			println(@sprintf("blockFGMRES iterated maxIter (=%d) outer iterations without achieving the desired tolerance. Acheived: %1.2e",maxIter,err))
+		elseif flag==0 && out>=1
+			println(@sprintf("blockFGMRES achieved desired tolerance at inner iteration %d. Residual norm is %1.2e.",counter,resvec[counter]))
+		end
+	end
+	return X,flag,resvec[counter],iter,resvec[1:counter]
+end #blockFGMRES
diff --git a/src/fgmres.jl b/src/fgmres.jl
index 753d7a8..d68a8b3 100644
--- a/src/fgmres.jl
+++ b/src/fgmres.jl
@@ -15,11 +15,11 @@ mutable struct FGMRESmem
 	Z				::Array
 end
 
-function getFGMRESmem(n::Int,flexible::Bool,T::Type,k::Int)
+function getFGMRESmem(n::Int,flexible::Bool,T::Type,k::Int,nrhs::Int=1)
 	if flexible
-		return FGMRESmem(zeros(T,n,k),zeros(T,n,k));
+		return FGMRESmem(zeros(T,n,k*nrhs),zeros(T,n,k*nrhs));
 	else
-		return FGMRESmem(zeros(T,n,k),zeros(T,0));
+		return FGMRESmem(zeros(T,n,k*nrhs),zeros(T,0));
 	end
 end
 
@@ -31,14 +31,14 @@ function isempty(mem::FGMRESmem)
 	return size(mem.V,1)==0;
 end
 
-function checkMemorySize(mem::FGMRESmem,n::Int,k::Int,TYPE::Type,flexible::Bool)
+function checkMemorySize(mem::FGMRESmem,n::Int,k::Int,TYPE::Type,flexible::Bool,nrhs::Int=1)
 	if isempty(mem)
 		# warn("Allocating memory in FGMRES")
-		mem = getFGMRESmem(n,flexible,TYPE,k);
+		mem = getFGMRESmem(n,flexible,TYPE,k,nrhs);
 		return mem
 	else
-		if size(mem.V,2)!= k
-			error("FGMRES: size of Krylov subspace is different than inner");
+		if size(mem.V,2)!= k*nrhs
+			error("FGMRES: size of Krylov subspace is different than inner*nrhs");
 		end
 	end
 end
@@ -71,7 +71,7 @@ flag    - exit flag (  0 : desired tolerance achieved,
 	iter    - number of iterations
 	resvec  - norm of relative residual at each iteration
 	
-	preconditioner M(r) must return a copy of a vector. Cannot reuse memory of 
+	preconditioner M(r) must return a copy of a vector. Cannot reuse memory of r.
 	"""
 function fgmres(A::Function,b::Vector,restrt::Int; tol::Real=1e-2,maxIter::Int=100,M::Function=t->copy(t),x::Vector=[],out::Int=0,storeInterm::Bool=false,flexible::Bool=false,mem::FGMRESmem =  getEmptyFGMRESmem())
   # initialization
@@ -91,7 +91,8 @@ function fgmres(A::Function,b::Vector,restrt::Int; tol::Real=1e-2,maxIter::Int=1
   elseif norm(x) < eps(real(TYPE))
 	r = copy(b);
   else
-	r = b-A(x);
+	r = copy(b);
+	r.-=A(x);
   end
   
   if eltype(b) <: Complex 
@@ -214,8 +215,9 @@ function fgmres(A::Function,b::Vector,restrt::Int; tol::Real=1e-2,maxIter::Int=1
 			flag = 0;
 			break
 		end
-		if maxIter > 1
-			r = b - A(x);
+		if iter < maxIter
+			r = copy(b);
+			r.-=A(x);
 			betta = norm(r)
 		end
 		# if out==2; print(@sprintf("\t %1.1e\n", err)); end
diff --git a/test/runtests.jl b/test/runtests.jl
index 3176c17..7979f64 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -17,6 +17,7 @@ include("testCGLS.jl")
 include("testGS.jl")
 include("testGMRES.jl")
 include("testFGMRES.jl")
+include("testBlockFGMRES.jl")
 include("testLANCZOS.jl")
 include("testSSOR.jl")
 include("testLSQR.jl")
diff --git a/test/testBlockFGMRES.jl b/test/testBlockFGMRES.jl
new file mode 100644
index 0000000..3b55182
--- /dev/null
+++ b/test/testBlockFGMRES.jl
@@ -0,0 +1,59 @@
+
+println("*******************************************************")
+@testset "blockFGMRES" begin
+	@testset "real matrix" begin
+		n = 50;
+		A  = sprandn(n,n,.1) + SparseMatrixCSC(10.0I, n, n)
+		m  = 2; 
+		D  = Vector(diag(A))
+		M2 = x -> D.\x
+		rhs = randn(n,m)
+		tol = 1e-6;
+		
+		# test printing and behaviour for early stopping
+		xtt = blockFGMRES(A,rhs ,3,tol=1e-12,maxIter=3,out=2)
+		@test xtt[2]==-1
+		
+		# test behaviour for zero rhs
+		xtt = blockFGMRES(A,0*rhs,3,tol=tol,maxIter=10,out=2)
+		@test xtt[2]==-9
+		@test all(xtt[1].==0.0)
+		@test length(xtt[1])==m*n
+		@test eltype(xtt[1])==eltype(rhs)
+
+		println("-----------------------------------")
+		x1 = blockFGMRES(A,rhs ,5,tol=tol,maxIter=100,out=1)
+		x3 = blockFGMRES(A,rhs,5,tol=tol,maxIter=100,X=randn(size(rhs)),flexible=true)
+		x4 = blockFGMRES(A,rhs,5,tol=tol,maxIter=100,M=M2,flexible = true)
+		
+		@test norm(A*x1[1]-rhs)/norm(rhs) < tol
+		@test norm(A*x3[1]-rhs)/norm(rhs) < tol
+		@test norm(A*x4[1]-rhs)/norm(rhs) < tol
+		@test norm(x3[1]-x1[1])/norm(x1[1]) < 1e-5
+	end
+	println("*****************************************************************************")
+	@testset "complex matrix" begin
+		A  = sprandn(100,100,.1) + SparseMatrixCSC(10.0I, 100, 100) + 1im*(sprandn(100,100,.1) + SparseMatrixCSC(10.0I, 100, 100) )
+		m  = 3; 
+		D  = Vector(diag(A))
+		M3 = x -> x./D
+		rhs = complex(randn(100,m))
+		tol = 1e-6;
+		
+		# test behaviour for zero rhs
+		xtt = blockFGMRES(A,0.0*rhs,5,tol=tol,maxIter=10,out=2)
+		@test xtt[2]==-9
+		@test all(xtt[1].==0)
+		@test length(xtt[1])==100*m
+		@test eltype(xtt[1])==eltype(rhs)
+		
+		
+		x1 = blockFGMRES(A,rhs ,5,tol=tol,maxIter=100)
+		x3 = blockFGMRES(A,rhs,5,tol=tol,maxIter=100,X=randn(size(rhs)),flexible = true)
+		x4 = blockFGMRES(A,rhs,5,tol=tol,maxIter=100,M=M3,flexible = true)
+		
+		@test norm(A*x1[1]-rhs)/norm(rhs) < tol
+		@test norm(A*x3[1]-rhs)/norm(rhs) < tol
+		@test norm(x3[1]-x1[1])/norm(x1[1]) < 1e-5
+	end
+end
\ No newline at end of file
diff --git a/test/testFGMRES.jl b/test/testFGMRES.jl
index ed76bb1..c982ae7 100644
--- a/test/testFGMRES.jl
+++ b/test/testFGMRES.jl
@@ -5,12 +5,12 @@ println("*******************************************************")
 		A  = sprandn(100,100,.1) + SparseMatrixCSC(10.0I, 100, 100)
 		n  = size(A,2)
 		D  = diag(A)
-		M2 = x -> D.\x
+		M2 = x -> Vector(D.\x)
 		rhs = randn(100)
 		tol = 1e-6;
 		
 		# test printing and behaviour for early stopping
-		xtt = fgmres(A,rhs ,3,tol=1e-10,maxIter=3,out=2,storeInterm=true)
+		xtt = fgmres(A,rhs ,3,tol=1e-12,maxIter=3,out=2,storeInterm=true)
 		@test xtt[2]==-1
 		
 		# test behaviour for zero rhs
@@ -22,8 +22,8 @@ println("*******************************************************")
 
 		
 		x1 = fgmres(A,rhs ,5,tol=tol,maxIter=100,out=1)
-		x3 = fgmres(A,rhs,5,tol=tol,maxIter=100,x=randn(size(rhs)))
-		x4 = fgmres(A,rhs,5,tol=tol,maxIter=100,M=M2)
+		x3 = fgmres(A,rhs,5,tol=tol,maxIter=100,x=randn(size(rhs)),flexible = true)
+		x4 = fgmres(A,rhs,5,tol=tol,maxIter=100,M=M2,flexible = true)
 		
 		@test norm(A*x1[1]-rhs)/norm(rhs) < tol
 		@test norm(A*x3[1]-rhs)/norm(rhs) < tol