From aca3737a25111ccf96f85bb9e997d3b4c4e09881 Mon Sep 17 00:00:00 2001 From: ChrisRackauckas Date: Thu, 20 Nov 2025 08:49:44 -0500 Subject: [PATCH 1/3] Fix default algorithm for sparse CUDA matrices to LUFactorization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously: - CuSparseMatrixCSC defaulted to GenericLU (not GPU compatible) - CuSparseMatrixCSR fell back to Krylov when CUDSS unavailable (not GPU compatible) - Symmetric sparse matrices defaulted to Cholesky (not GPU compatible) Changes: - Both CuSparseMatrixCSC and CuSparseMatrixCSR now default to LUFactorization - Removed Krylov fallback - if CUDSS is not loaded, clear error message is shown - Added cudss_loaded() support for CuSparseMatrixCSC - Added error_no_cudss_lu() for CuSparseMatrixCSC Fixes #827 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- ext/LinearSolveCUDAExt.jl | 22 +++++++++++++--------- ext/LinearSolveCUDSSExt.jl | 1 + 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/ext/LinearSolveCUDAExt.jl b/ext/LinearSolveCUDAExt.jl index 58e2c3444..3bcbc7354 100644 --- a/ext/LinearSolveCUDAExt.jl +++ b/ext/LinearSolveCUDAExt.jl @@ -20,15 +20,12 @@ end function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSR{Tv, Ti}, b, assump::OperatorAssumptions{Bool}) where {Tv, Ti} - if LinearSolve.cudss_loaded(A) - LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.LUFactorization) - else - if !LinearSolve.ALREADY_WARNED_CUDSS[] - @warn("CUDSS.jl is required for LU Factorizations on CuSparseMatrixCSR. Please load this library. Falling back to Krylov") - LinearSolve.ALREADY_WARNED_CUDSS[] = true - end - LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES) - end + LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.LUFactorization) +end + +function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSC{Tv, Ti}, b, + assump::OperatorAssumptions{Bool}) where {Tv, Ti} + LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.LUFactorization) end function LinearSolve.error_no_cudss_lu(A::CUDA.CUSPARSE.CuSparseMatrixCSR) @@ -38,6 +35,13 @@ function LinearSolve.error_no_cudss_lu(A::CUDA.CUSPARSE.CuSparseMatrixCSR) nothing end +function LinearSolve.error_no_cudss_lu(A::CUDA.CUSPARSE.CuSparseMatrixCSC) + if !LinearSolve.cudss_loaded(A) + error("CUDSS.jl is required for LU Factorizations on CuSparseMatrixCSC. Please load this library.") + end + nothing +end + function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::CudaOffloadLUFactorization; kwargs...) if cache.isfresh diff --git a/ext/LinearSolveCUDSSExt.jl b/ext/LinearSolveCUDSSExt.jl index 506ada99a..bbcf635f0 100644 --- a/ext/LinearSolveCUDSSExt.jl +++ b/ext/LinearSolveCUDSSExt.jl @@ -4,5 +4,6 @@ using LinearSolve: LinearSolve, cudss_loaded using CUDSS LinearSolve.cudss_loaded(A::CUDSS.CUDA.CUSPARSE.CuSparseMatrixCSR) = true +LinearSolve.cudss_loaded(A::CUDSS.CUDA.CUSPARSE.CuSparseMatrixCSC) = true end From a0668dee2a4706c28b6b523b34f4d4453800897b Mon Sep 17 00:00:00 2001 From: ChrisRackauckas Date: Thu, 20 Nov 2025 08:52:14 -0500 Subject: [PATCH 2/3] Apply JuliaFormatter with SciMLStyle to changed files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- ext/LinearSolveCUDAExt.jl | 40 +++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/ext/LinearSolveCUDAExt.jl b/ext/LinearSolveCUDAExt.jl index 3bcbc7354..75b6146bd 100644 --- a/ext/LinearSolveCUDAExt.jl +++ b/ext/LinearSolveCUDAExt.jl @@ -5,9 +5,11 @@ using LinearSolve: LinearSolve, is_cusparse, defaultalg, cudss_loaded, DefaultLi DefaultAlgorithmChoice, ALREADY_WARNED_CUDSS, LinearCache, needs_concrete_A, error_no_cudss_lu, init_cacheval, OperatorAssumptions, - CudaOffloadFactorization, CudaOffloadLUFactorization, CudaOffloadQRFactorization, + CudaOffloadFactorization, CudaOffloadLUFactorization, + CudaOffloadQRFactorization, CUDAOffload32MixedLUFactorization, - SparspakFactorization, KLUFactorization, UMFPACKFactorization, LinearVerbosity + SparspakFactorization, KLUFactorization, UMFPACKFactorization, + LinearVerbosity using LinearSolve.LinearAlgebra, LinearSolve.SciMLBase, LinearSolve.ArrayInterface using SciMLBase: AbstractSciMLOperator @@ -56,14 +58,15 @@ function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::CudaOffloadLUFact SciMLBase.build_linear_solution(alg, y, nothing, cache) end -function LinearSolve.init_cacheval(alg::CudaOffloadLUFactorization, A::AbstractArray, b, u, Pl, Pr, +function LinearSolve.init_cacheval( + alg::CudaOffloadLUFactorization, A::AbstractArray, b, u, Pl, Pr, maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions) # Check if CUDA is functional before creating CUDA arrays if !CUDA.functional() return nothing end - + T = eltype(A) noUnitT = typeof(zero(T)) luT = LinearAlgebra.lutype(noUnitT) @@ -91,7 +94,7 @@ function LinearSolve.init_cacheval(alg::CudaOffloadQRFactorization, A, b, u, Pl, if !CUDA.functional() return nothing end - + qr(CUDA.CuArray(A)) end @@ -108,7 +111,8 @@ function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::CudaOffloadFactor SciMLBase.build_linear_solution(alg, y, nothing, cache) end -function LinearSolve.init_cacheval(alg::CudaOffloadFactorization, A::AbstractArray, b, u, Pl, Pr, +function LinearSolve.init_cacheval( + alg::CudaOffloadFactorization, A::AbstractArray, b, u, Pl, Pr, maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions) qr(CUDA.CuArray(A)) @@ -116,27 +120,33 @@ end function LinearSolve.init_cacheval( ::SparspakFactorization, A::CUDA.CUSPARSE.CuSparseMatrixCSR, b, u, - Pl, Pr, maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions) + Pl, Pr, maxiters::Int, abstol, reltol, + verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions) nothing end function LinearSolve.init_cacheval( ::KLUFactorization, A::CUDA.CUSPARSE.CuSparseMatrixCSR, b, u, - Pl, Pr, maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions) + Pl, Pr, maxiters::Int, abstol, reltol, + verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions) nothing end function LinearSolve.init_cacheval( ::UMFPACKFactorization, A::CUDA.CUSPARSE.CuSparseMatrixCSR, b, u, - Pl, Pr, maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions) + Pl, Pr, maxiters::Int, abstol, reltol, + verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions) nothing end # Mixed precision CUDA LU implementation -function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::CUDAOffload32MixedLUFactorization; +function SciMLBase.solve!( + cache::LinearSolve.LinearCache, alg::CUDAOffload32MixedLUFactorization; kwargs...) if cache.isfresh - fact, A_gpu_f32, b_gpu_f32, u_gpu_f32 = LinearSolve.@get_cacheval(cache, :CUDAOffload32MixedLUFactorization) + fact, A_gpu_f32, + b_gpu_f32, + u_gpu_f32 = LinearSolve.@get_cacheval(cache, :CUDAOffload32MixedLUFactorization) # Compute 32-bit type on demand and convert T32 = eltype(cache.A) <: Complex ? ComplexF32 : Float32 A_f32 = T32.(cache.A) @@ -145,12 +155,14 @@ function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::CUDAOffload32Mixe cache.cacheval = (fact, A_gpu_f32, b_gpu_f32, u_gpu_f32) cache.isfresh = false end - fact, A_gpu_f32, b_gpu_f32, u_gpu_f32 = LinearSolve.@get_cacheval(cache, :CUDAOffload32MixedLUFactorization) - + fact, A_gpu_f32, + b_gpu_f32, + u_gpu_f32 = LinearSolve.@get_cacheval(cache, :CUDAOffload32MixedLUFactorization) + # Compute types on demand for conversions T32 = eltype(cache.A) <: Complex ? ComplexF32 : Float32 Torig = eltype(cache.u) - + # Convert b to Float32, solve, then convert back to original precision b_f32 = T32.(cache.b) copyto!(b_gpu_f32, b_f32) From f6c26d805723a3cf6576bb3c426aaecb08c78429 Mon Sep 17 00:00:00 2001 From: ChrisRackauckas Date: Thu, 20 Nov 2025 08:59:17 -0500 Subject: [PATCH 3/3] Check cudss_loaded() and error instead of falling back to Krylov MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updated defaultalg() to check if CUDSS is loaded and error immediately if not available, rather than removing the check entirely or falling back to Krylov (which also doesn't work on GPU). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- ext/LinearSolveCUDAExt.jl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/ext/LinearSolveCUDAExt.jl b/ext/LinearSolveCUDAExt.jl index 75b6146bd..e446094e1 100644 --- a/ext/LinearSolveCUDAExt.jl +++ b/ext/LinearSolveCUDAExt.jl @@ -22,12 +22,20 @@ end function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSR{Tv, Ti}, b, assump::OperatorAssumptions{Bool}) where {Tv, Ti} - LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.LUFactorization) + if LinearSolve.cudss_loaded(A) + LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.LUFactorization) + else + error("CUDSS.jl is required for LU Factorizations on CuSparseMatrixCSR. Please load this library.") + end end function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSC{Tv, Ti}, b, assump::OperatorAssumptions{Bool}) where {Tv, Ti} - LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.LUFactorization) + if LinearSolve.cudss_loaded(A) + LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.LUFactorization) + else + error("CUDSS.jl is required for LU Factorizations on CuSparseMatrixCSC. Please load this library.") + end end function LinearSolve.error_no_cudss_lu(A::CUDA.CUSPARSE.CuSparseMatrixCSR)