Merge 4fac168 into 1bb2a3a

SciML · May 18, 2022 · c13c4ea · c13c4ea
2 parents 1bb2a3a + 4fac168
commit c13c4ea
Show file tree

Hide file tree

Showing 16 changed files with 245 additions and 196 deletions.
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -1,5 +1,5 @@
 steps:
-  - label: "GPU"
+  - label: "LinearSolveCUDA"
     plugins:
       - JuliaCI/julia#v1:
           version: "1"
@@ -9,7 +9,7 @@ steps:
       queue: "juliagpu"
       cuda: "*"
     env:
-      GROUP: 'GPU'
+      GROUP: 'LinearSolveCUDA'
       JULIA_PKG_SERVER: "" # it often struggles with our large artifacts
       # SECRET_CODECOV_TOKEN: "..."
     timeout_in_minutes: 30

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -13,6 +13,7 @@ jobs:
       matrix:
         group:
           - All
+          - LinearSolvePardiso
         version:
           - '1'
           - '1.6'

diff --git a/Project.toml b/Project.toml
@@ -6,14 +6,14 @@ version = "1.15.0"
 [deps]
 ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
+GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
 IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
 KLU = "ef3ab10e-7fda-4108-b977-705223b18434"
 Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"
 KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 RecursiveFactorization = "f2c3362d-daeb-58d1-803e-2bc74f2840b4"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
-Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
@@ -23,24 +23,23 @@ UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
 [compat]
 ArrayInterface = "3, 4, 5"
 DocStringExtensions = "0.8"
+GPUArrays = "8"
 IterativeSolvers = "0.9.2"
 KLU = "0.3.0"
 Krylov = "0.7.11, 0.8"
 KrylovKit = "0.5"
 RecursiveFactorization = "0.2.8"
 Reexport = "1"
-Requires = "1"
 SciMLBase = "1.25"
 Setfield = "0.7, 0.8"
 UnPack = "1"
 julia = "1.6"
 
 [extras]
-Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "Pardiso", "Pkg", "Random", "SafeTestsets"]
+test = ["Test", "Pkg", "Random", "SafeTestsets"]
diff --git a/docs/src/solvers/solvers.md b/docs/src/solvers/solvers.md
@@ -77,6 +77,12 @@ customized per-package, details given below describe a subset of important array
       factorization does not use pivoting during the numerical factorization and therefore the
       procedure can fail even for invertible matrices.
 
+### LinearSolve.jl
+
+LinearSolve.jl contains some linear solvers built in.
+
+- `SimpleLUFactorization`: a simple LU-factorization implementation without BLAS. Fast for small matrices.
+
 ### SuiteSparse.jl
 
 By default, the SuiteSparse.jl are implemented for efficiency by caching the
@@ -92,8 +98,11 @@ be used in a context where that assumption does not hold, set `reuse_symbolic=fa
 
 ### Pardiso.jl
 
-This package is not loaded by default. Thus in order to use this package you
-must first `using Pardiso`. The following algorithms are pre-specified:
+!!! note
+
+    Using this solver requires adding the package LinearSolvePardiso.jl
+
+The following algorithms are pre-specified:
 
 - `MKLPardisoFactorize(;kwargs...)`: A sparse factorization method.
 - `MKLPardisoIterate(;kwargs...)`: A mixed factorization+iterative method.
@@ -128,7 +137,11 @@ end
 Note that `CuArrays` are supported by `GenericFactorization` in the "normal" way.
 The following are non-standard GPU factorization routines.
 
-- `GPUOffloadFactorization()`: An offloading technique used to GPU-accelerate CPU-based
+!!! note
+
+    Using this solver requires adding the package LinearSolveCUDA.jl
+
+- `CudaOffloadFactorization()`: An offloading technique used to GPU-accelerate CPU-based
   computations. Requires a sufficiently large `A` to overcome the data transfer
   costs.
 

diff --git a/lib/LinearSolveCUDA/Project.toml b/lib/LinearSolveCUDA/Project.toml
@@ -0,0 +1,22 @@
+name = "LinearSolveCUDA"
+uuid = "0d26bed2-170e-468a-8415-1cfcbba6e180"
+authors = ["SciML"]
+version = "0.1"
+
+[deps]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
+SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
+
+[compat]
+CUDA = "3"
+SciMLBase = "1.25"
+LinearSolve = "1"
+julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/lib/LinearSolveCUDA/src/LinearSolveCUDA.jl b/lib/LinearSolveCUDA/src/LinearSolveCUDA.jl
@@ -0,0 +1,31 @@
+module LinearSolveCUDA
+
+using CUDA, LinearAlgebra, LinearSolve, SciMLBase
+
+struct CudaOffloadFactorization <: LinearSolve.AbstractFactorization end
+
+function SciMLBase.solve(cache::LinearSolve.LinearCache, alg::CudaOffloadFactorization; kwargs...)
+    if cache.isfresh
+        fact = LinearSolve.do_factorization(alg, CUDA.CuArray(cache.A), cache.b, cache.u)
+        cache = LinearSolve.set_cacheval(cache, fact)
+    end
+
+    copyto!(cache.u, cache.b)
+    y = Array(ldiv!(cache.cacheval, CUDA.CuArray(cache.u)))
+    SciMLBase.build_linear_solution(alg, y, nothing, cache)
+end
+
+function LinearSolve.do_factorization(alg::CudaOffloadFactorization, A, b, u)
+    A isa Union{AbstractMatrix,AbstractDiffEqOperator} ||
+        error("LU is not defined for $(typeof(A))")
+
+    if A isa AbstractDiffEqOperator
+        A = A.A
+    end
+    fact = qr(CUDA.CuArray(A))
+    return fact
+end
+
+export CudaOffloadFactorization
+
+end
diff --git a/test/cuda.jl → lib/LinearSolveCUDA/test/runtests.jl b/test/cuda.jl → lib/LinearSolveCUDA/test/runtests.jl
@@ -1,42 +1,41 @@
-using LinearSolve, LinearAlgebra, SparseArrays
-using CUDA
-using Test
-
-n = 8
-A = Matrix(I,n,n)
-b = ones(n)
-A1 = A/1; b1 = rand(n); x1 = zero(b)
-A2 = A/2; b2 = rand(n); x2 = zero(b)
-
-prob1 = LinearProblem(A1, b1; u0=x1)
-prob2 = LinearProblem(A2, b2; u0=x2)
-
-cache_kwargs = (;verbose=true, abstol=1e-8, reltol=1e-8, maxiter=30,)
-
-function test_interface(alg, prob1, prob2)
-    A1 = prob1.A; b1 = prob1.b; x1 = prob1.u0
-    A2 = prob2.A; b2 = prob2.b; x2 = prob2.u0
-
-    y = solve(prob1, alg; cache_kwargs...)
-    @test A1 *  y  ≈ b1
-
-    cache = SciMLBase.init(prob1,alg; cache_kwargs...) # initialize cache
-    y = solve(cache)
-    @test A1 *  y  ≈ b1
-
-    cache = LinearSolve.set_A(cache,copy(A2))
-    y = solve(cache)
-    @test A2 *  y  ≈ b1
-
-    cache = LinearSolve.set_b(cache,b2)
-    y = solve(cache)
-    @test A2 *  y  ≈ b2
-
-    return
-end
-
-test_interface(GPUOffloadFactorization(), prob1, prob2)
-
-A1 = prob1.A; b1 = prob1.b; x1 = prob1.u0
-y = solve(prob1)
-@test A1 *  y  ≈ b1
+using LinearSolve, LinearSolveCUDA, LinearAlgebra, SparseArrays
+using Test
+
+n = 8
+A = Matrix(I,n,n)
+b = ones(n)
+A1 = A/1; b1 = rand(n); x1 = zero(b)
+A2 = A/2; b2 = rand(n); x2 = zero(b)
+
+prob1 = LinearProblem(A1, b1; u0=x1)
+prob2 = LinearProblem(A2, b2; u0=x2)
+
+cache_kwargs = (;verbose=true, abstol=1e-8, reltol=1e-8, maxiter=30,)
+
+function test_interface(alg, prob1, prob2)
+    A1 = prob1.A; b1 = prob1.b; x1 = prob1.u0
+    A2 = prob2.A; b2 = prob2.b; x2 = prob2.u0
+
+    y = solve(prob1, alg; cache_kwargs...)
+    @test A1 *  y  ≈ b1
+
+    cache = SciMLBase.init(prob1,alg; cache_kwargs...) # initialize cache
+    y = solve(cache)
+    @test A1 *  y  ≈ b1
+
+    cache = LinearSolve.set_A(cache,copy(A2))
+    y = solve(cache)
+    @test A2 *  y  ≈ b1
+
+    cache = LinearSolve.set_b(cache,b2)
+    y = solve(cache)
+    @test A2 *  y  ≈ b2
+
+    return
+end
+
+test_interface(CudaOffloadFactorization(), prob1, prob2)
+
+A1 = prob1.A; b1 = prob1.b; x1 = prob1.u0
+y = solve(prob1)
+@test A1 *  y  ≈ b1
diff --git a/lib/LinearSolvePardiso/Project.toml b/lib/LinearSolvePardiso/Project.toml
@@ -0,0 +1,22 @@
+name = "LinearSolvePardiso"
+uuid = "a6722589-28b8-4472-944e-bde9ee6da670"
+authors = ["SciML"]
+version = "0.1"
+
+[deps]
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
+Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2"
+SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
+
+[compat]
+SciMLBase = "1.25"
+LinearSolve = "1"
+Pardiso =  "0.5"
+julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/src/pardiso.jl → ...earSolvePardiso/src/LinearSolvePardiso.jl b/src/pardiso.jl → ...earSolvePardiso/src/LinearSolvePardiso.jl
@@ -1,33 +1,37 @@
-Base.@kwdef struct PardisoJL <: SciMLLinearSolveAlgorithm
-    nprocs::Union{Int, Nothing} = nothing
-    solver_type::Union{Int, Pardiso.Solver, Nothing} = nothing
-    matrix_type::Union{Int, Pardiso.MatrixType, Nothing} = nothing
-    iparm::Union{Vector{Tuple{Int,Int}}, Nothing} = nothing
-    dparm::Union{Vector{Tuple{Int,Int}}, Nothing} = nothing
+module LinearSolvePardiso
+
+using Pardiso, LinearSolve, SciMLBase
+
+Base.@kwdef struct PardisoJL <: SciMLBase.SciMLLinearSolveAlgorithm
+    nprocs::Union{Int,Nothing} = nothing
+    solver_type::Union{Int,Pardiso.Solver,Nothing} = nothing
+    matrix_type::Union{Int,Pardiso.MatrixType,Nothing} = nothing
+    iparm::Union{Vector{Tuple{Int,Int}},Nothing} = nothing
+    dparm::Union{Vector{Tuple{Int,Int}},Nothing} = nothing
 end
 
-MKLPardisoFactorize(;kwargs...) = PardisoJL(;solver_type = 0,kwargs...)
-MKLPardisoIterate(;kwargs...) = PardisoJL(;solver_type = 1,kwargs...)
-needs_concrete_A(alg::PardisoJL) = true
+MKLPardisoFactorize(; kwargs...) = PardisoJL(; solver_type=0, kwargs...)
+MKLPardisoIterate(; kwargs...) = PardisoJL(; solver_type=1, kwargs...)
+LinearSolve.needs_concrete_A(alg::PardisoJL) = true
 
 # TODO schur complement functionality
 
-function init_cacheval(alg::PardisoJL, A, b, u, Pl, Pr, maxiters, abstol, reltol, verbose)
+function LinearSolve.init_cacheval(alg::PardisoJL, A, b, u, Pl, Pr, maxiters, abstol, reltol, verbose)
     @unpack nprocs, solver_type, matrix_type, iparm, dparm = alg
-    A = convert(AbstractMatrix,A)
+    A = convert(AbstractMatrix, A)
 
     solver =
-    if Pardiso.PARDISO_LOADED[]
-        solver = Pardiso.PardisoSolver()
-        solver_type !== nothing && Pardiso.set_solver!(solver, solver_type)
+        if Pardiso.PARDISO_LOADED[]
+            solver = Pardiso.PardisoSolver()
+            solver_type !== nothing && Pardiso.set_solver!(solver, solver_type)
 
-        solver
-    else
-        solver = Pardiso.MKLPardisoSolver()
-        nprocs !== nothing && Pardiso.set_nprocs!(solver, nprocs)
+            solver
+        else
+            solver = Pardiso.MKLPardisoSolver()
+            nprocs !== nothing && Pardiso.set_nprocs!(solver, nprocs)
 
-        solver
-    end
+            solver
+        end
 
     Pardiso.pardisoinit(solver) # default initialization
 
@@ -58,7 +62,7 @@ function init_cacheval(alg::PardisoJL, A, b, u, Pl, Pr, maxiters, abstol, reltol
     end
 
     # Make sure to say it's transposed because its CSC not CSR
-    Pardiso.set_iparm!(solver,12, 1)
+    Pardiso.set_iparm!(solver, 12, 1)
 
     #=
     Note: It is recommended to use IPARM(11)=1 (scaling) and IPARM(13)=1 (matchings) for
@@ -71,8 +75,8 @@ function init_cacheval(alg::PardisoJL, A, b, u, Pl, Pr, maxiters, abstol, reltol
     be changed to Pardiso.ANALYSIS_NUM_FACT in the solver loop otherwise instabilities
     occur in the example https://github.com/SciML/OrdinaryDiffEq.jl/issues/1569
     =#
-    Pardiso.set_iparm!(solver,11, 0)
-    Pardiso.set_iparm!(solver,13, 0)
+    Pardiso.set_iparm!(solver, 11, 0)
+    Pardiso.set_iparm!(solver, 13, 0)
 
     Pardiso.set_phase!(solver, Pardiso.ANALYSIS)
 
@@ -81,17 +85,17 @@ function init_cacheval(alg::PardisoJL, A, b, u, Pl, Pr, maxiters, abstol, reltol
         # applies these exact factors L and U for the next steps in a
         # preconditioned Krylov-Subspace iteration. If the iteration does not
         # converge, the solver will automatically switch back to the numerical factorization.
-        Pardiso.set_iparm!(solver,3,round(Int,abs(log10(reltol)),RoundDown) * 10 + 1)
+        Pardiso.set_iparm!(solver, 3, round(Int, abs(log10(reltol)), RoundDown) * 10 + 1)
     end
 
     Pardiso.pardiso(solver, u, A, b)
 
     return solver
 end
 
-function SciMLBase.solve(cache::LinearCache, alg::PardisoJL; kwargs...)
+function SciMLBase.solve(cache::LinearSolve.LinearCache, alg::PardisoJL; kwargs...)
     @unpack A, b, u = cache
-    A = convert(AbstractMatrix,A)
+    A = convert(AbstractMatrix, A)
 
     if cache.isfresh
         Pardiso.set_phase!(cache.cacheval, Pardiso.NUM_FACT)
@@ -100,10 +104,12 @@ function SciMLBase.solve(cache::LinearCache, alg::PardisoJL; kwargs...)
 
     Pardiso.set_phase!(cache.cacheval, Pardiso.SOLVE_ITERATIVE_REFINE)
     Pardiso.pardiso(cache.cacheval, u, A, b)
-    return SciMLBase.build_linear_solution(alg,cache.u,nothing,cache)
+    return SciMLBase.build_linear_solution(alg, cache.u, nothing, cache)
 end
 
 # Add finalizer to release memory
 # Pardiso.set_phase!(cache.cacheval, Pardiso.RELEASE_ALL)
 
 export PardisoJL, MKLPardisoFactorize, MKLPardisoIterate
+
+end