From aca3737a25111ccf96f85bb9e997d3b4c4e09881 Mon Sep 17 00:00:00 2001
From: ChrisRackauckas <me@chrisrackauckas.com>
Date: Thu, 20 Nov 2025 08:49:44 -0500
Subject: [PATCH 1/3] Fix default algorithm for sparse CUDA matrices to
 LUFactorization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously:
- CuSparseMatrixCSC defaulted to GenericLU (not GPU compatible)
- CuSparseMatrixCSR fell back to Krylov when CUDSS unavailable (not GPU compatible)
- Symmetric sparse matrices defaulted to Cholesky (not GPU compatible)

Changes:
- Both CuSparseMatrixCSC and CuSparseMatrixCSR now default to LUFactorization
- Removed Krylov fallback - if CUDSS is not loaded, clear error message is shown
- Added cudss_loaded() support for CuSparseMatrixCSC
- Added error_no_cudss_lu() for CuSparseMatrixCSC

Fixes #827

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 ext/LinearSolveCUDAExt.jl  | 22 +++++++++++++---------
 ext/LinearSolveCUDSSExt.jl |  1 +
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/ext/LinearSolveCUDAExt.jl b/ext/LinearSolveCUDAExt.jl
index 58e2c3444..3bcbc7354 100644
--- a/ext/LinearSolveCUDAExt.jl
+++ b/ext/LinearSolveCUDAExt.jl
@@ -20,15 +20,12 @@ end
 
 function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSR{Tv, Ti}, b,
         assump::OperatorAssumptions{Bool}) where {Tv, Ti}
-    if LinearSolve.cudss_loaded(A)
-        LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.LUFactorization)
-    else
-        if !LinearSolve.ALREADY_WARNED_CUDSS[]
-            @warn("CUDSS.jl is required for LU Factorizations on CuSparseMatrixCSR. Please load this library. Falling back to Krylov")
-            LinearSolve.ALREADY_WARNED_CUDSS[] = true
-        end
-        LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES)
-    end
+    LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.LUFactorization)
+end
+
+function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSC{Tv, Ti}, b,
+        assump::OperatorAssumptions{Bool}) where {Tv, Ti}
+    LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.LUFactorization)
 end
 
 function LinearSolve.error_no_cudss_lu(A::CUDA.CUSPARSE.CuSparseMatrixCSR)
@@ -38,6 +35,13 @@ function LinearSolve.error_no_cudss_lu(A::CUDA.CUSPARSE.CuSparseMatrixCSR)
     nothing
 end
 
+function LinearSolve.error_no_cudss_lu(A::CUDA.CUSPARSE.CuSparseMatrixCSC)
+    if !LinearSolve.cudss_loaded(A)
+        error("CUDSS.jl is required for LU Factorizations on CuSparseMatrixCSC. Please load this library.")
+    end
+    nothing
+end
+
 function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::CudaOffloadLUFactorization;
         kwargs...)
     if cache.isfresh
diff --git a/ext/LinearSolveCUDSSExt.jl b/ext/LinearSolveCUDSSExt.jl
index 506ada99a..bbcf635f0 100644
--- a/ext/LinearSolveCUDSSExt.jl
+++ b/ext/LinearSolveCUDSSExt.jl
@@ -4,5 +4,6 @@ using LinearSolve: LinearSolve, cudss_loaded
 using CUDSS
 
 LinearSolve.cudss_loaded(A::CUDSS.CUDA.CUSPARSE.CuSparseMatrixCSR) = true
+LinearSolve.cudss_loaded(A::CUDSS.CUDA.CUSPARSE.CuSparseMatrixCSC) = true
 
 end

From a0668dee2a4706c28b6b523b34f4d4453800897b Mon Sep 17 00:00:00 2001
From: ChrisRackauckas <me@chrisrackauckas.com>
Date: Thu, 20 Nov 2025 08:52:14 -0500
Subject: [PATCH 2/3] Apply JuliaFormatter with SciMLStyle to changed files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 ext/LinearSolveCUDAExt.jl | 40 +++++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/ext/LinearSolveCUDAExt.jl b/ext/LinearSolveCUDAExt.jl
index 3bcbc7354..75b6146bd 100644
--- a/ext/LinearSolveCUDAExt.jl
+++ b/ext/LinearSolveCUDAExt.jl
@@ -5,9 +5,11 @@ using LinearSolve: LinearSolve, is_cusparse, defaultalg, cudss_loaded, DefaultLi
                    DefaultAlgorithmChoice, ALREADY_WARNED_CUDSS, LinearCache,
                    needs_concrete_A,
                    error_no_cudss_lu, init_cacheval, OperatorAssumptions,
-                   CudaOffloadFactorization, CudaOffloadLUFactorization, CudaOffloadQRFactorization,
+                   CudaOffloadFactorization, CudaOffloadLUFactorization,
+                   CudaOffloadQRFactorization,
                    CUDAOffload32MixedLUFactorization,
-                   SparspakFactorization, KLUFactorization, UMFPACKFactorization, LinearVerbosity
+                   SparspakFactorization, KLUFactorization, UMFPACKFactorization,
+                   LinearVerbosity
 using LinearSolve.LinearAlgebra, LinearSolve.SciMLBase, LinearSolve.ArrayInterface
 using SciMLBase: AbstractSciMLOperator
 
@@ -56,14 +58,15 @@ function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::CudaOffloadLUFact
     SciMLBase.build_linear_solution(alg, y, nothing, cache)
 end
 
-function LinearSolve.init_cacheval(alg::CudaOffloadLUFactorization, A::AbstractArray, b, u, Pl, Pr,
+function LinearSolve.init_cacheval(
+        alg::CudaOffloadLUFactorization, A::AbstractArray, b, u, Pl, Pr,
         maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool},
         assumptions::OperatorAssumptions)
     # Check if CUDA is functional before creating CUDA arrays
     if !CUDA.functional()
         return nothing
     end
-    
+
     T = eltype(A)
     noUnitT = typeof(zero(T))
     luT = LinearAlgebra.lutype(noUnitT)
@@ -91,7 +94,7 @@ function LinearSolve.init_cacheval(alg::CudaOffloadQRFactorization, A, b, u, Pl,
     if !CUDA.functional()
         return nothing
     end
-    
+
     qr(CUDA.CuArray(A))
 end
 
@@ -108,7 +111,8 @@ function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::CudaOffloadFactor
     SciMLBase.build_linear_solution(alg, y, nothing, cache)
 end
 
-function LinearSolve.init_cacheval(alg::CudaOffloadFactorization, A::AbstractArray, b, u, Pl, Pr,
+function LinearSolve.init_cacheval(
+        alg::CudaOffloadFactorization, A::AbstractArray, b, u, Pl, Pr,
         maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool},
         assumptions::OperatorAssumptions)
     qr(CUDA.CuArray(A))
@@ -116,27 +120,33 @@ end
 
 function LinearSolve.init_cacheval(
         ::SparspakFactorization, A::CUDA.CUSPARSE.CuSparseMatrixCSR, b, u,
-        Pl, Pr, maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions)
+        Pl, Pr, maxiters::Int, abstol, reltol,
+        verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions)
     nothing
 end
 
 function LinearSolve.init_cacheval(
         ::KLUFactorization, A::CUDA.CUSPARSE.CuSparseMatrixCSR, b, u,
-        Pl, Pr, maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions)
+        Pl, Pr, maxiters::Int, abstol, reltol,
+        verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions)
     nothing
 end
 
 function LinearSolve.init_cacheval(
         ::UMFPACKFactorization, A::CUDA.CUSPARSE.CuSparseMatrixCSR, b, u,
-        Pl, Pr, maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions)
+        Pl, Pr, maxiters::Int, abstol, reltol,
+        verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions)
     nothing
 end
 
 # Mixed precision CUDA LU implementation
-function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::CUDAOffload32MixedLUFactorization;
+function SciMLBase.solve!(
+        cache::LinearSolve.LinearCache, alg::CUDAOffload32MixedLUFactorization;
         kwargs...)
     if cache.isfresh
-        fact, A_gpu_f32, b_gpu_f32, u_gpu_f32 = LinearSolve.@get_cacheval(cache, :CUDAOffload32MixedLUFactorization)
+        fact, A_gpu_f32,
+        b_gpu_f32,
+        u_gpu_f32 = LinearSolve.@get_cacheval(cache, :CUDAOffload32MixedLUFactorization)
         # Compute 32-bit type on demand and convert
         T32 = eltype(cache.A) <: Complex ? ComplexF32 : Float32
         A_f32 = T32.(cache.A)
@@ -145,12 +155,14 @@ function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::CUDAOffload32Mixe
         cache.cacheval = (fact, A_gpu_f32, b_gpu_f32, u_gpu_f32)
         cache.isfresh = false
     end
-    fact, A_gpu_f32, b_gpu_f32, u_gpu_f32 = LinearSolve.@get_cacheval(cache, :CUDAOffload32MixedLUFactorization)
-    
+    fact, A_gpu_f32,
+    b_gpu_f32,
+    u_gpu_f32 = LinearSolve.@get_cacheval(cache, :CUDAOffload32MixedLUFactorization)
+
     # Compute types on demand for conversions
     T32 = eltype(cache.A) <: Complex ? ComplexF32 : Float32
     Torig = eltype(cache.u)
-    
+
     # Convert b to Float32, solve, then convert back to original precision
     b_f32 = T32.(cache.b)
     copyto!(b_gpu_f32, b_f32)

From f6c26d805723a3cf6576bb3c426aaecb08c78429 Mon Sep 17 00:00:00 2001
From: ChrisRackauckas <me@chrisrackauckas.com>
Date: Thu, 20 Nov 2025 08:59:17 -0500
Subject: [PATCH 3/3] Check cudss_loaded() and error instead of falling back to
 Krylov
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Updated defaultalg() to check if CUDSS is loaded and error immediately
if not available, rather than removing the check entirely or falling
back to Krylov (which also doesn't work on GPU).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 ext/LinearSolveCUDAExt.jl | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/ext/LinearSolveCUDAExt.jl b/ext/LinearSolveCUDAExt.jl
index 75b6146bd..e446094e1 100644
--- a/ext/LinearSolveCUDAExt.jl
+++ b/ext/LinearSolveCUDAExt.jl
@@ -22,12 +22,20 @@ end
 
 function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSR{Tv, Ti}, b,
         assump::OperatorAssumptions{Bool}) where {Tv, Ti}
-    LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.LUFactorization)
+    if LinearSolve.cudss_loaded(A)
+        LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.LUFactorization)
+    else
+        error("CUDSS.jl is required for LU Factorizations on CuSparseMatrixCSR. Please load this library.")
+    end
 end
 
 function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSC{Tv, Ti}, b,
         assump::OperatorAssumptions{Bool}) where {Tv, Ti}
-    LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.LUFactorization)
+    if LinearSolve.cudss_loaded(A)
+        LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.LUFactorization)
+    else
+        error("CUDSS.jl is required for LU Factorizations on CuSparseMatrixCSC. Please load this library.")
+    end
 end
 
 function LinearSolve.error_no_cudss_lu(A::CUDA.CUSPARSE.CuSparseMatrixCSR)