Skip to content

Commit

Permalink
Merge 4fac168 into 1bb2a3a
Browse files Browse the repository at this point in the history
  • Loading branch information
ChrisRackauckas committed May 18, 2022
2 parents 1bb2a3a + 4fac168 commit c13c4ea
Show file tree
Hide file tree
Showing 16 changed files with 245 additions and 196 deletions.
4 changes: 2 additions & 2 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
steps:
- label: "GPU"
- label: "LinearSolveCUDA"
plugins:
- JuliaCI/julia#v1:
version: "1"
Expand All @@ -9,7 +9,7 @@ steps:
queue: "juliagpu"
cuda: "*"
env:
GROUP: 'GPU'
GROUP: 'LinearSolveCUDA'
JULIA_PKG_SERVER: "" # it often struggles with our large artifacts
# SECRET_CODECOV_TOKEN: "..."
timeout_in_minutes: 30
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ jobs:
matrix:
group:
- All
- LinearSolvePardiso
version:
- '1'
- '1.6'
Expand Down
7 changes: 3 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ version = "1.15.0"
[deps]
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
KLU = "ef3ab10e-7fda-4108-b977-705223b18434"
Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"
KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
RecursiveFactorization = "f2c3362d-daeb-58d1-803e-2bc74f2840b4"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Expand All @@ -23,24 +23,23 @@ UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
[compat]
ArrayInterface = "3, 4, 5"
DocStringExtensions = "0.8"
GPUArrays = "8"
IterativeSolvers = "0.9.2"
KLU = "0.3.0"
Krylov = "0.7.11, 0.8"
KrylovKit = "0.5"
RecursiveFactorization = "0.2.8"
Reexport = "1"
Requires = "1"
SciMLBase = "1.25"
Setfield = "0.7, 0.8"
UnPack = "1"
julia = "1.6"

[extras]
Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test", "Pardiso", "Pkg", "Random", "SafeTestsets"]
test = ["Test", "Pkg", "Random", "SafeTestsets"]
19 changes: 16 additions & 3 deletions docs/src/solvers/solvers.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ customized per-package, details given below describe a subset of important array
factorization does not use pivoting during the numerical factorization and therefore the
procedure can fail even for invertible matrices.

### LinearSolve.jl

LinearSolve.jl contains some linear solvers built in.

- `SimpleLUFactorization`: a simple LU-factorization implementation without BLAS. Fast for small matrices.

### SuiteSparse.jl

By default, the SuiteSparse.jl are implemented for efficiency by caching the
Expand All @@ -92,8 +98,11 @@ be used in a context where that assumption does not hold, set `reuse_symbolic=fa

### Pardiso.jl

This package is not loaded by default. Thus in order to use this package you
must first `using Pardiso`. The following algorithms are pre-specified:
!!! note

Using this solver requires adding the package LinearSolvePardiso.jl

The following algorithms are pre-specified:

- `MKLPardisoFactorize(;kwargs...)`: A sparse factorization method.
- `MKLPardisoIterate(;kwargs...)`: A mixed factorization+iterative method.
Expand Down Expand Up @@ -128,7 +137,11 @@ end
Note that `CuArrays` are supported by `GenericFactorization` in the "normal" way.
The following are non-standard GPU factorization routines.

- `GPUOffloadFactorization()`: An offloading technique used to GPU-accelerate CPU-based
!!! note

Using this solver requires adding the package LinearSolveCUDA.jl

- `CudaOffloadFactorization()`: An offloading technique used to GPU-accelerate CPU-based
computations. Requires a sufficiently large `A` to overcome the data transfer
costs.

Expand Down
22 changes: 22 additions & 0 deletions lib/LinearSolveCUDA/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name = "LinearSolveCUDA"
uuid = "0d26bed2-170e-468a-8415-1cfcbba6e180"
authors = ["SciML"]
version = "0.1"

[deps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"

[compat]
CUDA = "3"
SciMLBase = "1.25"
LinearSolve = "1"
julia = "1.6"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]
31 changes: 31 additions & 0 deletions lib/LinearSolveCUDA/src/LinearSolveCUDA.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
module LinearSolveCUDA

using CUDA, LinearAlgebra, LinearSolve, SciMLBase

struct CudaOffloadFactorization <: LinearSolve.AbstractFactorization end

function SciMLBase.solve(cache::LinearSolve.LinearCache, alg::CudaOffloadFactorization; kwargs...)
if cache.isfresh
fact = LinearSolve.do_factorization(alg, CUDA.CuArray(cache.A), cache.b, cache.u)
cache = LinearSolve.set_cacheval(cache, fact)
end

copyto!(cache.u, cache.b)
y = Array(ldiv!(cache.cacheval, CUDA.CuArray(cache.u)))
SciMLBase.build_linear_solution(alg, y, nothing, cache)
end

function LinearSolve.do_factorization(alg::CudaOffloadFactorization, A, b, u)
A isa Union{AbstractMatrix,AbstractDiffEqOperator} ||
error("LU is not defined for $(typeof(A))")

if A isa AbstractDiffEqOperator
A = A.A
end
fact = qr(CUDA.CuArray(A))
return fact
end

export CudaOffloadFactorization

end
83 changes: 41 additions & 42 deletions test/cuda.jl → lib/LinearSolveCUDA/test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,42 +1,41 @@
using LinearSolve, LinearAlgebra, SparseArrays
using CUDA
using Test

n = 8
A = Matrix(I,n,n)
b = ones(n)
A1 = A/1; b1 = rand(n); x1 = zero(b)
A2 = A/2; b2 = rand(n); x2 = zero(b)

prob1 = LinearProblem(A1, b1; u0=x1)
prob2 = LinearProblem(A2, b2; u0=x2)

cache_kwargs = (;verbose=true, abstol=1e-8, reltol=1e-8, maxiter=30,)

function test_interface(alg, prob1, prob2)
A1 = prob1.A; b1 = prob1.b; x1 = prob1.u0
A2 = prob2.A; b2 = prob2.b; x2 = prob2.u0

y = solve(prob1, alg; cache_kwargs...)
@test A1 * y b1

cache = SciMLBase.init(prob1,alg; cache_kwargs...) # initialize cache
y = solve(cache)
@test A1 * y b1

cache = LinearSolve.set_A(cache,copy(A2))
y = solve(cache)
@test A2 * y b1

cache = LinearSolve.set_b(cache,b2)
y = solve(cache)
@test A2 * y b2

return
end

test_interface(GPUOffloadFactorization(), prob1, prob2)

A1 = prob1.A; b1 = prob1.b; x1 = prob1.u0
y = solve(prob1)
@test A1 * y b1
using LinearSolve, LinearSolveCUDA, LinearAlgebra, SparseArrays
using Test

n = 8
A = Matrix(I,n,n)
b = ones(n)
A1 = A/1; b1 = rand(n); x1 = zero(b)
A2 = A/2; b2 = rand(n); x2 = zero(b)

prob1 = LinearProblem(A1, b1; u0=x1)
prob2 = LinearProblem(A2, b2; u0=x2)

cache_kwargs = (;verbose=true, abstol=1e-8, reltol=1e-8, maxiter=30,)

function test_interface(alg, prob1, prob2)
A1 = prob1.A; b1 = prob1.b; x1 = prob1.u0
A2 = prob2.A; b2 = prob2.b; x2 = prob2.u0

y = solve(prob1, alg; cache_kwargs...)
@test A1 * y b1

cache = SciMLBase.init(prob1,alg; cache_kwargs...) # initialize cache
y = solve(cache)
@test A1 * y b1

cache = LinearSolve.set_A(cache,copy(A2))
y = solve(cache)
@test A2 * y b1

cache = LinearSolve.set_b(cache,b2)
y = solve(cache)
@test A2 * y b2

return
end

test_interface(CudaOffloadFactorization(), prob1, prob2)

A1 = prob1.A; b1 = prob1.b; x1 = prob1.u0
y = solve(prob1)
@test A1 * y b1
22 changes: 22 additions & 0 deletions lib/LinearSolvePardiso/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name = "LinearSolvePardiso"
uuid = "a6722589-28b8-4472-944e-bde9ee6da670"
authors = ["SciML"]
version = "0.1"

[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2"
SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"

[compat]
SciMLBase = "1.25"
LinearSolve = "1"
Pardiso = "0.5"
julia = "1.6"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]
60 changes: 33 additions & 27 deletions src/pardiso.jl → ...earSolvePardiso/src/LinearSolvePardiso.jl
Original file line number Diff line number Diff line change
@@ -1,33 +1,37 @@
Base.@kwdef struct PardisoJL <: SciMLLinearSolveAlgorithm
nprocs::Union{Int, Nothing} = nothing
solver_type::Union{Int, Pardiso.Solver, Nothing} = nothing
matrix_type::Union{Int, Pardiso.MatrixType, Nothing} = nothing
iparm::Union{Vector{Tuple{Int,Int}}, Nothing} = nothing
dparm::Union{Vector{Tuple{Int,Int}}, Nothing} = nothing
module LinearSolvePardiso

using Pardiso, LinearSolve, SciMLBase

Base.@kwdef struct PardisoJL <: SciMLBase.SciMLLinearSolveAlgorithm
nprocs::Union{Int,Nothing} = nothing
solver_type::Union{Int,Pardiso.Solver,Nothing} = nothing
matrix_type::Union{Int,Pardiso.MatrixType,Nothing} = nothing
iparm::Union{Vector{Tuple{Int,Int}},Nothing} = nothing
dparm::Union{Vector{Tuple{Int,Int}},Nothing} = nothing
end

MKLPardisoFactorize(;kwargs...) = PardisoJL(;solver_type = 0,kwargs...)
MKLPardisoIterate(;kwargs...) = PardisoJL(;solver_type = 1,kwargs...)
needs_concrete_A(alg::PardisoJL) = true
MKLPardisoFactorize(; kwargs...) = PardisoJL(; solver_type=0, kwargs...)
MKLPardisoIterate(; kwargs...) = PardisoJL(; solver_type=1, kwargs...)
LinearSolve.needs_concrete_A(alg::PardisoJL) = true

# TODO schur complement functionality

function init_cacheval(alg::PardisoJL, A, b, u, Pl, Pr, maxiters, abstol, reltol, verbose)
function LinearSolve.init_cacheval(alg::PardisoJL, A, b, u, Pl, Pr, maxiters, abstol, reltol, verbose)
@unpack nprocs, solver_type, matrix_type, iparm, dparm = alg
A = convert(AbstractMatrix,A)
A = convert(AbstractMatrix, A)

solver =
if Pardiso.PARDISO_LOADED[]
solver = Pardiso.PardisoSolver()
solver_type !== nothing && Pardiso.set_solver!(solver, solver_type)
if Pardiso.PARDISO_LOADED[]
solver = Pardiso.PardisoSolver()
solver_type !== nothing && Pardiso.set_solver!(solver, solver_type)

solver
else
solver = Pardiso.MKLPardisoSolver()
nprocs !== nothing && Pardiso.set_nprocs!(solver, nprocs)
solver
else
solver = Pardiso.MKLPardisoSolver()
nprocs !== nothing && Pardiso.set_nprocs!(solver, nprocs)

solver
end
solver
end

Pardiso.pardisoinit(solver) # default initialization

Expand Down Expand Up @@ -58,7 +62,7 @@ function init_cacheval(alg::PardisoJL, A, b, u, Pl, Pr, maxiters, abstol, reltol
end

# Make sure to say it's transposed because its CSC not CSR
Pardiso.set_iparm!(solver,12, 1)
Pardiso.set_iparm!(solver, 12, 1)

#=
Note: It is recommended to use IPARM(11)=1 (scaling) and IPARM(13)=1 (matchings) for
Expand All @@ -71,8 +75,8 @@ function init_cacheval(alg::PardisoJL, A, b, u, Pl, Pr, maxiters, abstol, reltol
be changed to Pardiso.ANALYSIS_NUM_FACT in the solver loop otherwise instabilities
occur in the example https://github.com/SciML/OrdinaryDiffEq.jl/issues/1569
=#
Pardiso.set_iparm!(solver,11, 0)
Pardiso.set_iparm!(solver,13, 0)
Pardiso.set_iparm!(solver, 11, 0)
Pardiso.set_iparm!(solver, 13, 0)

Pardiso.set_phase!(solver, Pardiso.ANALYSIS)

Expand All @@ -81,17 +85,17 @@ function init_cacheval(alg::PardisoJL, A, b, u, Pl, Pr, maxiters, abstol, reltol
# applies these exact factors L and U for the next steps in a
# preconditioned Krylov-Subspace iteration. If the iteration does not
# converge, the solver will automatically switch back to the numerical factorization.
Pardiso.set_iparm!(solver,3,round(Int,abs(log10(reltol)),RoundDown) * 10 + 1)
Pardiso.set_iparm!(solver, 3, round(Int, abs(log10(reltol)), RoundDown) * 10 + 1)
end

Pardiso.pardiso(solver, u, A, b)

return solver
end

function SciMLBase.solve(cache::LinearCache, alg::PardisoJL; kwargs...)
function SciMLBase.solve(cache::LinearSolve.LinearCache, alg::PardisoJL; kwargs...)
@unpack A, b, u = cache
A = convert(AbstractMatrix,A)
A = convert(AbstractMatrix, A)

if cache.isfresh
Pardiso.set_phase!(cache.cacheval, Pardiso.NUM_FACT)
Expand All @@ -100,10 +104,12 @@ function SciMLBase.solve(cache::LinearCache, alg::PardisoJL; kwargs...)

Pardiso.set_phase!(cache.cacheval, Pardiso.SOLVE_ITERATIVE_REFINE)
Pardiso.pardiso(cache.cacheval, u, A, b)
return SciMLBase.build_linear_solution(alg,cache.u,nothing,cache)
return SciMLBase.build_linear_solution(alg, cache.u, nothing, cache)
end

# Add finalizer to release memory
# Pardiso.set_phase!(cache.cacheval, Pardiso.RELEASE_ALL)

export PardisoJL, MKLPardisoFactorize, MKLPardisoIterate

end
Loading

0 comments on commit c13c4ea

Please sign in to comment.