From 3c666f76567ebd957cb1608c0e5edcebd016a361 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 16 Sep 2025 14:44:28 -0600
Subject: [PATCH 1/8] [oneMKL] Add support for oneSparseMatrixCSC

---
 lib/mkl/array.jl           |  12 ++-
 lib/mkl/interfaces.jl      |  31 ++++--
 lib/mkl/utils.jl           |   3 +
 lib/mkl/wrappers_sparse.jl | 192 +++++++++++++++++++++++++++++++++++++
 test/onemkl.jl             | 183 ++++++++++++++++++++---------------
 5 files changed, 336 insertions(+), 85 deletions(-)

diff --git a/lib/mkl/array.jl b/lib/mkl/array.jl
index f97b807c..0db254e8 100644
--- a/lib/mkl/array.jl
+++ b/lib/mkl/array.jl
@@ -1,4 +1,4 @@
-export oneSparseMatrixCSR, oneSparseMatrixCOO
+export oneSparseMatrixCSR, oneSparseMatrixCSC, oneSparseMatrixCOO
 
 abstract type oneAbstractSparseArray{Tv, Ti, N} <: AbstractSparseArray{Tv, Ti, N} end
 const oneAbstractSparseVector{Tv, Ti} = oneAbstractSparseArray{Tv, Ti, 1}
@@ -13,6 +13,15 @@ mutable struct oneSparseMatrixCSR{Tv, Ti} <: oneAbstractSparseMatrix{Tv, Ti}
     nnz::Ti
 end
 
+mutable struct oneSparseMatrixCSC{Tv, Ti} <: oneAbstractSparseMatrix{Tv, Ti}
+    handle::matrix_handle_t
+    colPtr::oneVector{Ti}
+    rowVal::oneVector{Ti}
+    nzVal::oneVector{Tv}
+    dims::NTuple{2,Int}
+    nnz::Ti
+end
+
 mutable struct oneSparseMatrixCOO{Tv, Ti} <: oneAbstractSparseMatrix{Tv, Ti}
     handle::matrix_handle_t
     rowInd::oneVector{Ti}
@@ -37,6 +46,7 @@ SparseArrays.nnz(A::oneAbstractSparseMatrix) = A.nnz
 SparseArrays.nonzeros(A::oneAbstractSparseMatrix) = A.nzVal
 
 for (gpu, cpu) in [:oneSparseMatrixCSR => :SparseMatrixCSC,
+                   :oneSparseMatrixCSC => :SparseMatrixCSC,
                    :oneSparseMatrixCOO => :SparseMatrixCSC]
     @eval Base.show(io::IOContext, x::$gpu) =
         show(io, $cpu(x))
diff --git a/lib/mkl/interfaces.jl b/lib/mkl/interfaces.jl
index 725b120d..a4e5f83f 100644
--- a/lib/mkl/interfaces.jl
+++ b/lib/mkl/interfaces.jl
@@ -7,16 +7,35 @@ function LinearAlgebra.generic_matvecmul!(C::oneVector{T}, tA::AbstractChar, A::
     sparse_gemv!(tA, _add.alpha, A, B, _add.beta, C)
 end
 
+function LinearAlgebra.generic_matvecmul!(C::oneVector{T}, tA::AbstractChar, A::oneSparseMatrixCSC{T}, B::oneVector{T}, _add::MulAddMul) where T <: BlasReal
+    tA = tA in ('S', 's', 'H', 'h') ? 'T' : flip_trans(tA)
+    sparse_gemv!(tA, _add.alpha, A, B, _add.beta, C)
+end
+
 function LinearAlgebra.generic_matmatmul!(C::oneMatrix{T}, tA, tB, A::oneSparseMatrixCSR{T}, B::oneMatrix{T}, _add::MulAddMul) where T <: BlasFloat
     tA = tA in ('S', 's', 'H', 'h') ? 'N' : tA
     tB = tB in ('S', 's', 'H', 'h') ? 'N' : tB
     sparse_gemm!(tA, tB, _add.alpha, A, B, _add.beta, C)
 end
 
-for SparseMatrixType in (:oneSparseMatrixCSR,)
-    @eval begin
-        function LinearAlgebra.generic_trimatdiv!(C::oneVector{T}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::oneVector{T}) where T <: BlasFloat
-            sparse_trsv!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), A, B, C)
-        end
-    end
+function LinearAlgebra.generic_matmatmul!(C::oneMatrix{T}, tA, tB, A::oneSparseMatrixCSC{T}, B::oneMatrix{T}, _add::MulAddMul) where T <: BlasReal
+    tA = tA in ('S', 's', 'H', 'h') ? 'T' : flip_trans(tA)
+    tB = tB in ('S', 's', 'H', 'h') ? 'N' : tB
+    sparse_gemm!(tA, tB, _add.alpha, A, B, _add.beta, C)
+end
+
+function LinearAlgebra.generic_trimatdiv!(C::oneVector{T}, uploc, isunitc, tfun::Function, A::oneSparseMatrixCSR{T}, B::oneVector{T}) where T <: BlasFloat
+    sparse_trsv!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), A, B, C)
+end
+
+function LinearAlgebra.generic_trimatdiv!(C::oneVector{T}, uploc, isunitc, tfun::Function, A::oneSparseMatrixCSC{T}, B::oneVector{T}) where T <: BlasReal
+    sparse_trsv!(flip_uplo(uploc), tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), A, B, C)
+end
+
+function LinearAlgebra.generic_trimatdiv!(C::oneMatrix{T}, uploc, isunitc, tfun::Function, A::oneSparseMatrixCSR{T}, B::oneMatrix{T}) where T <: BlasFloat
+    sparse_trsm!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'N', isunitc, one(T), A, B, C)
+end
+
+function LinearAlgebra.generic_trimatdiv!(C::oneMatrix{T}, uploc, isunitc, tfun::Function, A::oneSparseMatrixCSC{T}, B::oneMatrix{T}) where T <: BlasReal
+    sparse_trsm!(flip_uplo(uploc), tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'N', isunitc, one(T), A, B, C)
 end
diff --git a/lib/mkl/utils.jl b/lib/mkl/utils.jl
index bd317a24..ba8ddbcb 100644
--- a/lib/mkl/utils.jl
+++ b/lib/mkl/utils.jl
@@ -113,3 +113,6 @@ end
     ptrs = pointer.(batch)
     return oneArray(ptrs)
 end
+
+flip_trans(trans::Char) = trans == 'N' ? 'T' : 'N'
+flip_uplo(uplo::Char) = uplo == 'L' ? 'U' : 'L'
diff --git a/lib/mkl/wrappers_sparse.jl b/lib/mkl/wrappers_sparse.jl
index f39cbd03..e4458357 100644
--- a/lib/mkl/wrappers_sparse.jl
+++ b/lib/mkl/wrappers_sparse.jl
@@ -35,6 +35,27 @@ for (fname, elty, intty) in ((:onemklSsparse_set_csr_data   , :Float32   , :Int3
             A_csc = SparseMatrixCSC(At |> transpose)
             return A_csc
         end
+
+        function oneSparseMatrixCSC(A::SparseMatrixCSC{$elty, $intty})
+            handle_ptr = Ref{matrix_handle_t}()
+            onemklXsparse_init_matrix_handle(handle_ptr)
+            m, n = size(A)
+            colPtr = oneVector{$intty}(A.colptr)
+            rowVal = oneVector{$intty}(A.rowval)
+            nzVal = oneVector{$elty}(A.nzval)
+            nnzA = length(A.nzval)
+            queue = global_queue(context(nzVal), device())
+            $fname(sycl_queue(queue), handle_ptr[], n, m, 'O', colPtr, rowVal, nzVal)  # CSC of A is CSR of Aᵀ
+            dA = oneSparseMatrixCSC{$elty, $intty}(handle_ptr[], colPtr, rowVal, nzVal, (m,n), nnzA)
+            finalizer(sparse_release_matrix_handle, dA)
+            return dA
+        end
+
+        function SparseMatrixCSC(A::oneSparseMatrixCSC{$elty, $intty})
+            handle_ptr = Ref{matrix_handle_t}()
+            A_csc = SparseMatrixCSC(A.dims..., Vector(A.colPtr), Vector(A.rowVal), Vector(A.nzVal))
+            return A_csc
+        end
     end
 end
 
@@ -100,6 +121,33 @@ for SparseMatrix in (:oneSparseMatrixCSR, :oneSparseMatrixCOO)
     end
 end
 
+for SparseMatrix in (:oneSparseMatrixCSC,)
+    for (fname, elty) in ((:onemklSsparse_gemv, :Float32),
+                          (:onemklDsparse_gemv, :Float64))
+        @eval begin
+            function sparse_gemv!(trans::Char,
+                                  alpha::Number,
+                                  A::$SparseMatrix{$elty},
+                                  x::oneStridedVector{$elty},
+                                  beta::Number,
+                                  y::oneStridedVector{$elty})
+
+                queue = global_queue(context(x), device())
+                $fname(sycl_queue(queue), flip_trans(trans), alpha, A.handle, x, beta, y)
+                y
+            end
+        end
+    end
+
+    @eval begin
+        function sparse_optimize_gemv!(trans::Char, A::$SparseMatrix)
+            queue = global_queue(context(A.nzVal), device(A.nzVal))
+            onemklXsparse_optimize_gemv(sycl_queue(queue), flip_trans(trans), A.handle)
+            return A
+        end
+    end
+end
+
 for (fname, elty) in ((:onemklSsparse_gemm, :Float32),
                       (:onemklDsparse_gemm, :Float64),
                       (:onemklCsparse_gemm, :ComplexF32),
@@ -139,6 +187,43 @@ function sparse_optimize_gemm!(trans::Char, transB::Char, nrhs::Int, A::oneSpars
     return A
 end
 
+for (fname, elty) in ((:onemklSsparse_gemm, :Float32),
+                      (:onemklDsparse_gemm, :Float64))
+    @eval begin
+        function sparse_gemm!(transa::Char,
+                              transb::Char,
+                              alpha::Number,
+                              A::oneSparseMatrixCSC{$elty},
+                              B::oneStridedMatrix{$elty},
+                              beta::Number,
+                              C::oneStridedMatrix{$elty})
+
+            mB, nB = size(B)
+            mC, nC = size(C)
+            (nB != nC) && (transb == 'N') && throw(ArgumentError("B and C must have the same number of columns."))
+            (mB != nC) && (transb != 'N') && throw(ArgumentError("Bᵀ and C must have the same number of columns."))
+            nrhs = size(B, 2)
+            ldb = max(1,stride(B,2))
+            ldc = max(1,stride(C,2))
+            queue = global_queue(context(C), device())
+            $fname(sycl_queue(queue), 'C', flip_trans(transa), transb, alpha, A.handle, B, nrhs, ldb, beta, C, ldc)
+            C
+        end
+    end
+end
+
+function sparse_optimize_gemm!(trans::Char, A::oneSparseMatrixCSC)
+    queue = global_queue(context(A.nzVal), device(A.nzVal))
+    onemklXsparse_optimize_gemm(sycl_queue(queue), flip_trans(trans), A.handle)
+    return A
+end
+
+function sparse_optimize_gemm!(trans::Char, transB::Char, nrhs::Int, A::oneSparseMatrixCSC)
+    queue = global_queue(context(A.nzVal), device(A.nzVal))
+    onemklXsparse_optimize_gemm_advanced(sycl_queue(queue), 'C', filp_trans(trans), transB, A.handle, nrhs)
+    return A
+end
+
 for (fname, elty) in ((:onemklSsparse_symv, :Float32),
                       (:onemklDsparse_symv, :Float64),
                       (:onemklCsparse_symv, :ComplexF32),
@@ -158,6 +243,24 @@ for (fname, elty) in ((:onemklSsparse_symv, :Float32),
     end
 end
 
+
+for (fname, elty) in ((:onemklSsparse_symv, :Float32),
+                      (:onemklDsparse_symv, :Float64))
+    @eval begin
+        function sparse_symv!(uplo::Char,
+                              alpha::Number,
+                              A::oneSparseMatrixCSC{$elty},
+                              x::oneStridedVector{$elty},
+                              beta::Number,
+                              y::oneStridedVector{$elty})
+
+            queue = global_queue(context(y), device())
+            $fname(sycl_queue(queue), flip_uplo(uplo), alpha, A.handle, x, beta, y)
+            y
+        end
+    end
+end
+
 for (fname, elty) in ((:onemklSsparse_trmv, :Float32),
                       (:onemklDsparse_trmv, :Float64),
                       (:onemklCsparse_trmv, :ComplexF32),
@@ -185,6 +288,31 @@ function sparse_optimize_trmv!(uplo::Char, trans::Char, diag::Char, A::oneSparse
     return A
 end
 
+for (fname, elty) in ((:onemklSsparse_trmv, :Float32),
+                      (:onemklDsparse_trmv, :Float64))
+    @eval begin
+        function sparse_trmv!(uplo::Char,
+                              trans::Char,
+                              diag::Char,
+                              alpha::Number,
+                              A::oneSparseMatrixCSC{$elty},
+                              x::oneStridedVector{$elty},
+                              beta::Number,
+                              y::oneStridedVector{$elty})
+
+            queue = global_queue(context(y), device())
+            $fname(sycl_queue(queue), flip_uplo(uplo), trans, diag, alpha, A.handle, x, beta, y)
+            y
+        end
+    end
+end
+
+function sparse_optimize_trmv!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC)
+    queue = global_queue(context(A.nzVal), device(A.nzVal))
+    onemklXsparse_optimize_trmv(sycl_queue(queue), flip_uplo(uplo), trans, diag, A.handle)
+    return A
+end
+
 for (fname, elty) in ((:onemklSsparse_trsv, :Float32),
                       (:onemklDsparse_trsv, :Float64),
                       (:onemklCsparse_trsv, :ComplexF32),
@@ -211,6 +339,30 @@ function sparse_optimize_trsv!(uplo::Char, trans::Char, diag::Char, A::oneSparse
     return A
 end
 
+for (fname, elty) in ((:onemklSsparse_trsv, :Float32),
+                      (:onemklDsparse_trsv, :Float64))
+    @eval begin
+        function sparse_trsv!(uplo::Char,
+                              trans::Char,
+                              diag::Char,
+                              alpha::Number,
+                              A::oneSparseMatrixCSC{$elty},
+                              x::oneStridedVector{$elty},
+                              y::oneStridedVector{$elty})
+
+            queue = global_queue(context(y), device())
+            $fname(sycl_queue(queue), filp_uplo(uplo), trans, diag, alpha, A.handle, x, y)
+            y
+        end
+    end
+end
+
+function sparse_optimize_trsv!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC)
+    queue = global_queue(context(A.nzVal), device(A.nzVal))
+    onemklXsparse_optimize_trsv(sycl_queue(queue), flip_uplo(uplo), trans, diag, A.handle)
+    return A
+end
+
 for (fname, elty) in ((:onemklSsparse_trsm, :Float32),
                       (:onemklDsparse_trsm, :Float64),
                       (:onemklCsparse_trsm, :ComplexF32),
@@ -252,3 +404,43 @@ function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, nrhs::Int, A
     onemklXsparse_optimize_trsm_advanced(sycl_queue(queue), 'C', uplo, trans, diag, A.handle, nrhs)
     return A
 end
+
+for (fname, elty) in ((:onemklSsparse_trsm, :Float32),
+                      (:onemklDsparse_trsm, :Float64))
+    @eval begin
+        function sparse_trsm!(uplo::Char,
+                              transA::Char,
+                              transX::Char,
+                              diag::Char,
+                              alpha::Number,
+                              A::oneSparseMatrixCSC{$elty},
+                              X::oneStridedMatrix{$elty},
+                              Y::oneStridedMatrix{$elty})
+
+            mX, nX = size(X)
+            mY, nY = size(Y)
+            (mX != mY) && (transX == 'N') && throw(ArgumentError("X and Y must have the same number of rows."))
+            (nX != nY) && (transX == 'N') && throw(ArgumentError("X and Y must have the same number of columns."))
+            (nX != mY) && (transX != 'N') && throw(ArgumentError("Xᵀ and Y must have the same number of rows."))
+            (mX != nY) && (transX != 'N') && throw(ArgumentError("Xᵀ and Y must have the same number of columns."))
+            nrhs = size(X, 2)
+            ldx = max(1,stride(X,2))
+            ldy = max(1,stride(Y,2))
+            queue = global_queue(context(Y), device())
+            $fname(sycl_queue(queue), 'C', transA, transX, flip_uplo(uplo), diag, alpha, A.handle, X, nrhs, ldx, Y, ldy)
+            Y
+        end
+    end
+end
+
+function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC)
+    queue = global_queue(context(A.nzVal), device(A.nzVal))
+    onemklXsparse_optimize_trsm(sycl_queue(queue), filp_uplo(uplo), trans, diag, A.handle)
+    return A
+end
+
+function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, nrhs::Int, A::oneSparseMatrixCSR)
+    queue = global_queue(context(A.nzVal), device(A.nzVal))
+    onemklXsparse_optimize_trsm_advanced(sycl_queue(queue), 'C', flip_uplo(uplo), trans, diag, A.handle, nrhs)
+    return A
+end
diff --git a/test/onemkl.jl b/test/onemkl.jl
index e5b6541c..bb3be88c 100644
--- a/test/onemkl.jl
+++ b/test/onemkl.jl
@@ -1088,6 +1088,17 @@ end
             end
         end
 
+        @testset "oneSparseMatrixCSC" begin
+            (T isa Complex) && continue
+            for S in (Int32, Int64)
+                A = sprand(T, 20, 10, 0.5)
+                A = SparseMatrixCSC{T, S}(A)
+                B = oneSparseMatrixCSC(A)
+                A2 = SparseMatrixCSC(B)
+                @test A == A2
+            end
+        end
+
         @testset "oneSparseMatrixCOO" begin
             for S in (Int32, Int64)
                 A = sprand(T, 20, 10, 0.5)
@@ -1099,8 +1110,9 @@ end
         end
 
         @testset "sparse gemv" begin
-            @testset  "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCOO, oneSparseMatrixCSR)
+            @testset  "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCOO, oneSparseMatrixCSR, oneSparseMatrixCSC)
                 @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)]
+                    (T isa Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
                     A = sprand(T, 20, 10, 0.5)
                     x = transa == 'N' ? rand(T, 10) : rand(T, 20)
                     y = transa == 'N' ? rand(T, 20) : rand(T, 10)
@@ -1119,119 +1131,134 @@ end
         end
 
         @testset "sparse gemm" begin
-            @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)]
-                @testset "transb = $transb" for (transb, opb) in [('N', identity), ('T', transpose), ('C', adjoint)]
-                    (transb == 'N') || continue
-                    A = sprand(T, 10, 10, 0.5)
-                    B = transb == 'N' ? rand(T, 10, 2) : rand(T, 2, 10)
-                    C = rand(T, 10, 2)
+            @testset  "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC)
+                @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)]
+                    @testset "transb = $transb" for (transb, opb) in [('N', identity), ('T', transpose), ('C', adjoint)]
+                        (transb == 'N') || continue
+                        (T isa Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
+                        A = sprand(T, 10, 10, 0.5)
+                        B = transb == 'N' ? rand(T, 10, 2) : rand(T, 2, 10)
+                        C = rand(T, 10, 2)
 
-                    dA = oneSparseMatrixCSR(A)
-                    dB = oneMatrix{T}(B)
-                    dC = oneMatrix{T}(C)
+                        dA = SparseMatrix(A)
+                        dB = oneMatrix{T}(B)
+                        dC = oneMatrix{T}(C)
 
-                    alpha = rand(T)
-                    beta = rand(T)
-                    oneMKL.sparse_optimize_gemm!(transa, dA)
-                    oneMKL.sparse_gemm!(transa, transb, alpha, dA, dB, beta, dC)
-                    @test alpha * opa(A) * opb(B) + beta * C ≈ collect(dC)
+                        alpha = rand(T)
+                        beta = rand(T)
+                        oneMKL.sparse_optimize_gemm!(transa, dA)
+                        oneMKL.sparse_gemm!(transa, transb, alpha, dA, dB, beta, dC)
+                        @test alpha * opa(A) * opb(B) + beta * C ≈ collect(dC)
+                    end
                 end
             end
         end
 
         @testset "sparse symv" begin
-            @testset "uplo = $uplo" for uplo in ('L', 'U')
-                A = sprand(T, 10, 10, 0.5)
-                A = A + A'
-                x = rand(T, 10)
-                y = rand(T, 10)
-
-                dA = uplo == 'L' ? oneSparseMatrixCSR(A |> tril) : oneSparseMatrixCSR(A |> triu)
-                dx = oneVector{T}(x)
-                dy = oneVector{T}(y)
-
-                alpha = rand(T)
-                beta = rand(T)
-                oneMKL.sparse_symv!(uplo, alpha, dA, dx, beta, dy)
-                # @test alpha * A * x + beta * y ≈ collect(dy)
-            end
-        end
-
-        @testset "sparse trmv" begin
-            @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)]
-                for (uplo, diag, wrapper) in [('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular),
-                                              ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular)]
-                    (transa == 'N') || continue
+            @testset  "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC)
+                @testset "uplo = $uplo" for uplo in ('L', 'U')
+                    (T isa Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
                     A = sprand(T, 10, 10, 0.5)
+                    A = A + A'
                     x = rand(T, 10)
                     y = rand(T, 10)
 
-                    B = uplo == 'L' ? tril(A) : triu(A)
-                    B = diag == 'U' ? B - Diagonal(B) + I : B
-                    dA = oneSparseMatrixCSR(B)
+                    dA = uplo == 'L' ? SparseMatrix(A |> tril) : SparseMatrix(A |> triu)
                     dx = oneVector{T}(x)
                     dy = oneVector{T}(y)
 
                     alpha = rand(T)
                     beta = rand(T)
-
-                    oneMKL.sparse_optimize_trmv!(uplo, transa, diag, dA)
-                    oneMKL.sparse_trmv!(uplo, transa, diag, alpha, dA, dx, beta, dy)
-                    @test alpha * wrapper(opa(A)) * x + beta * y ≈ collect(dy)
+                    oneMKL.sparse_symv!(uplo, alpha, dA, dx, beta, dy)
+                    @test alpha * A * x + beta * y ≈ collect(dy)
                 end
             end
         end
 
-        @testset "sparse trsv" begin
-            @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)]
-                for (uplo, diag, wrapper) in [('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular),
-                                              ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular)]
-                    (transa == 'N') || continue
-                    alpha = rand(T)
-                    A = rand(T, 10, 10) + I
-                    A = sparse(A)
-                    x = rand(T, 10)
-                    y = rand(T, 10)
+        @testset "sparse trmv" begin
+            @testset  "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC)
+                @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)]
+                    for (uplo, diag, wrapper) in [('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular),
+                                                  ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular)]
+                        (transa == 'N') || continue
+                        (T isa Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
+                        A = sprand(T, 10, 10, 0.5)
+                        x = rand(T, 10)
+                        y = rand(T, 10)
 
-                    B = uplo == 'L' ? tril(A) : triu(A)
-                    B = diag == 'U' ? B - Diagonal(B) + I : B
-                    dA = oneSparseMatrixCSR(B)
-                    dx = oneVector{T}(x)
-                    dy = oneVector{T}(y)
+                        B = uplo == 'L' ? tril(A) : triu(A)
+                        B = diag == 'U' ? B - Diagonal(B) + I : B
+                        dA = SparseMatrix(B)
+                        dx = oneVector{T}(x)
+                        dy = oneVector{T}(y)
+
+                        alpha = rand(T)
+                        beta = rand(T)
 
-                    oneMKL.sparse_optimize_trsv!(uplo, transa, diag, dA)
-                    oneMKL.sparse_trsv!(uplo, transa, diag, alpha, dA, dx, dy)
-                    y = wrapper(opa(A)) \ (alpha * x)
-                    @test y ≈ collect(dy)
+                        oneMKL.sparse_optimize_trmv!(uplo, transa, diag, dA)
+                        oneMKL.sparse_trmv!(uplo, transa, diag, alpha, dA, dx, beta, dy)
+                        @test alpha * wrapper(opa(A)) * x + beta * y ≈ collect(dy)
+                    end
                 end
             end
         end
 
-        @testset "sparse trsm" begin
-            @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)]
-                @testset "transx = $transx" for (transx, opx) in [('N', identity), ('T', transpose), ('C', adjoint)]
-                    (transx != 'N') && continue
+        @testset "sparse trsv" begin
+            @testset  "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC)
+                @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)]
                     for (uplo, diag, wrapper) in [('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular),
                                                   ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular)]
                         (transa == 'N') || continue
+                        (T isa Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
                         alpha = rand(T)
                         A = rand(T, 10, 10) + I
                         A = sparse(A)
-                        X = transx == 'N' ? rand(T, 10, 4) : rand(T, 4, 10)
-                        Y = rand(T, 10, 4)
+                        x = rand(T, 10)
+                        y = rand(T, 10)
 
                         B = uplo == 'L' ? tril(A) : triu(A)
                         B = diag == 'U' ? B - Diagonal(B) + I : B
-                        dA = oneSparseMatrixCSR(B)
-                        dX = oneMatrix{T}(X)
-                        dY = oneMatrix{T}(Y)
-
-                        oneMKL.sparse_optimize_trsm!(uplo, transa, diag, dA)
-                        oneMKL.sparse_trsm!(uplo, transa, transx, diag, alpha, dA, dX, dY)
-                        Y = wrapper(opa(A)) \ (alpha * opx(X))
-                        @test Y ≈ collect(dY)
+                        dA = SparseMatrix(B)
+                        dx = oneVector{T}(x)
+                        dy = oneVector{T}(y)
+
+                        oneMKL.sparse_optimize_trsv!(uplo, transa, diag, dA)
+                        oneMKL.sparse_trsv!(uplo, transa, diag, alpha, dA, dx, dy)
+                        y = wrapper(opa(A)) \ (alpha * x)
+                        @test y ≈ collect(dy)
+                    end
+                end
+            end
+        end
 
-                        oneMKL.sparse_optimize_trsm!(uplo, transa, diag, 4, dA)
+        @testset "sparse trsm" begin
+            @testset  "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC)
+                @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)]
+                    @testset "transx = $transx" for (transx, opx) in [('N', identity), ('T', transpose), ('C', adjoint)]
+                        (transx != 'N') && continue
+                        for (uplo, diag, wrapper) in [('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular),
+                                                      ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular)]
+                            (transa == 'N') || continue
+                            (T isa Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
+                            alpha = rand(T)
+                            A = rand(T, 10, 10) + I
+                            A = sparse(A)
+                            X = transx == 'N' ? rand(T, 10, 4) : rand(T, 4, 10)
+                            Y = rand(T, 10, 4)
+
+                            B = uplo == 'L' ? tril(A) : triu(A)
+                            B = diag == 'U' ? B - Diagonal(B) + I : B
+                            dA = SparseMatrix(B)
+                            dX = oneMatrix{T}(X)
+                            dY = oneMatrix{T}(Y)
+
+                            oneMKL.sparse_optimize_trsm!(uplo, transa, diag, dA)
+                            oneMKL.sparse_trsm!(uplo, transa, transx, diag, alpha, dA, dX, dY)
+                            Y = wrapper(opa(A)) \ (alpha * opx(X))
+                            @test Y ≈ collect(dY)
+
+                            oneMKL.sparse_optimize_trsm!(uplo, transa, diag, 4, dA)
+                        end
                     end
                 end
             end

From 4e8b825bd0928ecf44138fc7efd902376997417e Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 16 Sep 2025 15:29:56 -0600
Subject: [PATCH 2/8] Update test/onemkl.jl

---
 test/onemkl.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/onemkl.jl b/test/onemkl.jl
index bb3be88c..5f04135e 100644
--- a/test/onemkl.jl
+++ b/test/onemkl.jl
@@ -3,7 +3,7 @@ if Sys.iswindows()
 else
 
 using oneAPI
-using oneAPI.oneMKL: band, bandex, oneSparseMatrixCSR, oneSparseMatrixCOO
+using oneAPI.oneMKL: band, bandex, oneSparseMatrixCSR, oneSparseMatrixCOO, oneSparseMatrixCSC
 
 using SparseArrays
 using LinearAlgebra

From 6db7a178b3b1a35a1d8800348885f56bec577fd1 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 16 Sep 2025 15:33:28 -0600
Subject: [PATCH 3/8] Update test/onemkl.jl

---
 lib/mkl/wrappers_sparse.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/mkl/wrappers_sparse.jl b/lib/mkl/wrappers_sparse.jl
index e4458357..51dede37 100644
--- a/lib/mkl/wrappers_sparse.jl
+++ b/lib/mkl/wrappers_sparse.jl
@@ -439,7 +439,7 @@ function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, A::oneSparse
     return A
 end
 
-function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, nrhs::Int, A::oneSparseMatrixCSR)
+function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, nrhs::Int, A::oneSparseMatrixCSC)
     queue = global_queue(context(A.nzVal), device(A.nzVal))
     onemklXsparse_optimize_trsm_advanced(sycl_queue(queue), 'C', flip_uplo(uplo), trans, diag, A.handle, nrhs)
     return A

From 0d63db14bec86627b408cf1fe7c6aa3773595dd1 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 16 Sep 2025 16:02:55 -0600
Subject: [PATCH 4/8] Fix some tests

---
 test/onemkl.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/onemkl.jl b/test/onemkl.jl
index 5f04135e..7f374409 100644
--- a/test/onemkl.jl
+++ b/test/onemkl.jl
@@ -1112,7 +1112,7 @@ end
         @testset "sparse gemv" begin
             @testset  "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCOO, oneSparseMatrixCSR, oneSparseMatrixCSC)
                 @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)]
-                    (T isa Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
+                    (T <: Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
                     A = sprand(T, 20, 10, 0.5)
                     x = transa == 'N' ? rand(T, 10) : rand(T, 20)
                     y = transa == 'N' ? rand(T, 20) : rand(T, 10)
@@ -1135,7 +1135,7 @@ end
                 @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)]
                     @testset "transb = $transb" for (transb, opb) in [('N', identity), ('T', transpose), ('C', adjoint)]
                         (transb == 'N') || continue
-                        (T isa Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
+                        (T <: Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
                         A = sprand(T, 10, 10, 0.5)
                         B = transb == 'N' ? rand(T, 10, 2) : rand(T, 2, 10)
                         C = rand(T, 10, 2)
@@ -1157,7 +1157,7 @@ end
         @testset "sparse symv" begin
             @testset  "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC)
                 @testset "uplo = $uplo" for uplo in ('L', 'U')
-                    (T isa Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
+                    (T <: Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
                     A = sprand(T, 10, 10, 0.5)
                     A = A + A'
                     x = rand(T, 10)
@@ -1181,7 +1181,7 @@ end
                     for (uplo, diag, wrapper) in [('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular),
                                                   ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular)]
                         (transa == 'N') || continue
-                        (T isa Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
+                        (T <: Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
                         A = sprand(T, 10, 10, 0.5)
                         x = rand(T, 10)
                         y = rand(T, 10)
@@ -1209,7 +1209,7 @@ end
                     for (uplo, diag, wrapper) in [('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular),
                                                   ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular)]
                         (transa == 'N') || continue
-                        (T isa Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
+                        (T <: Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
                         alpha = rand(T)
                         A = rand(T, 10, 10) + I
                         A = sparse(A)
@@ -1239,7 +1239,7 @@ end
                         for (uplo, diag, wrapper) in [('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular),
                                                       ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular)]
                             (transa == 'N') || continue
-                            (T isa Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
+                            (T <: Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
                             alpha = rand(T)
                             A = rand(T, 10, 10) + I
                             A = sparse(A)

From d3d9471ce3c4f96520dc3d91743aa087238b116a Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 16 Sep 2025 17:10:31 -0600
Subject: [PATCH 5/8] More fixes

---
 lib/mkl/wrappers_sparse.jl | 11 +++++------
 test/onemkl.jl             | 16 ++++++++++------
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/lib/mkl/wrappers_sparse.jl b/lib/mkl/wrappers_sparse.jl
index 51dede37..9899848d 100644
--- a/lib/mkl/wrappers_sparse.jl
+++ b/lib/mkl/wrappers_sparse.jl
@@ -220,7 +220,7 @@ end
 
 function sparse_optimize_gemm!(trans::Char, transB::Char, nrhs::Int, A::oneSparseMatrixCSC)
     queue = global_queue(context(A.nzVal), device(A.nzVal))
-    onemklXsparse_optimize_gemm_advanced(sycl_queue(queue), 'C', filp_trans(trans), transB, A.handle, nrhs)
+    onemklXsparse_optimize_gemm_advanced(sycl_queue(queue), 'C', flip_trans(trans), transB, A.handle, nrhs)
     return A
 end
 
@@ -243,7 +243,6 @@ for (fname, elty) in ((:onemklSsparse_symv, :Float32),
     end
 end
 
-
 for (fname, elty) in ((:onemklSsparse_symv, :Float32),
                       (:onemklDsparse_symv, :Float64))
     @eval begin
@@ -301,7 +300,7 @@ for (fname, elty) in ((:onemklSsparse_trmv, :Float32),
                               y::oneStridedVector{$elty})
 
             queue = global_queue(context(y), device())
-            $fname(sycl_queue(queue), flip_uplo(uplo), trans, diag, alpha, A.handle, x, beta, y)
+            $fname(sycl_queue(queue), uplo, flip_trans(trans), diag, alpha, A.handle, x, beta, y)
             y
         end
     end
@@ -309,7 +308,7 @@ end
 
 function sparse_optimize_trmv!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC)
     queue = global_queue(context(A.nzVal), device(A.nzVal))
-    onemklXsparse_optimize_trmv(sycl_queue(queue), flip_uplo(uplo), trans, diag, A.handle)
+    onemklXsparse_optimize_trmv(sycl_queue(queue), uplo, flip_trans(trans), diag, A.handle)
     return A
 end
 
@@ -351,7 +350,7 @@ for (fname, elty) in ((:onemklSsparse_trsv, :Float32),
                               y::oneStridedVector{$elty})
 
             queue = global_queue(context(y), device())
-            $fname(sycl_queue(queue), filp_uplo(uplo), trans, diag, alpha, A.handle, x, y)
+            $fname(sycl_queue(queue), flip_uplo(uplo), trans, diag, alpha, A.handle, x, y)
             y
         end
     end
@@ -435,7 +434,7 @@ end
 
 function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC)
     queue = global_queue(context(A.nzVal), device(A.nzVal))
-    onemklXsparse_optimize_trsm(sycl_queue(queue), filp_uplo(uplo), trans, diag, A.handle)
+    onemklXsparse_optimize_trsm(sycl_queue(queue), flip_uplo(uplo), trans, diag, A.handle)
     return A
 end
 
diff --git a/test/onemkl.jl b/test/onemkl.jl
index 7f374409..53b47e09 100644
--- a/test/onemkl.jl
+++ b/test/onemkl.jl
@@ -1078,6 +1078,8 @@ end
 
 @testset "SPARSE" begin
     @testset "$T" for T in intersect(eltypes, [Float32, Float64, ComplexF32, ComplexF64])
+        ε = sqrt(eps(real(T)))
+
         @testset "oneSparseMatrixCSR" begin
             for S in (Int32, Int64)
                 A = sprand(T, 20, 10, 0.5)
@@ -1125,7 +1127,7 @@ end
                     beta = rand(T)
                     oneMKL.sparse_optimize_gemv!(transa, dA)
                     oneMKL.sparse_gemv!(transa, alpha, dA, dx, beta, dy)
-                    @test alpha * opa(A) * x + beta * y ≈ collect(dy)
+                    @test isapprox(alpha * opa(A) * x + beta * y, collect(dy), atol=ε)
                 end
             end
         end
@@ -1148,7 +1150,9 @@ end
                         beta = rand(T)
                         oneMKL.sparse_optimize_gemm!(transa, dA)
                         oneMKL.sparse_gemm!(transa, transb, alpha, dA, dB, beta, dC)
-                        @test alpha * opa(A) * opb(B) + beta * C ≈ collect(dC)
+                        @test isapprox(alpha * opa(A) * opb(B) + beta * C, collect(dC), atol=ε)
+
+                        oneMKL.sparse_optimize_gemm!(transa, transb, 2, dA)
                     end
                 end
             end
@@ -1170,7 +1174,7 @@ end
                     alpha = rand(T)
                     beta = rand(T)
                     oneMKL.sparse_symv!(uplo, alpha, dA, dx, beta, dy)
-                    @test alpha * A * x + beta * y ≈ collect(dy)
+                    @test isapprox(alpha * A * x + beta * y, collect(dy), atol=ε)
                 end
             end
         end
@@ -1197,7 +1201,7 @@ end
 
                         oneMKL.sparse_optimize_trmv!(uplo, transa, diag, dA)
                         oneMKL.sparse_trmv!(uplo, transa, diag, alpha, dA, dx, beta, dy)
-                        @test alpha * wrapper(opa(A)) * x + beta * y ≈ collect(dy)
+                        @test isapprox(alpha * wrapper(opa(A)) * x + beta * y, collect(dy), atol=ε)
                     end
                 end
             end
@@ -1225,7 +1229,7 @@ end
                         oneMKL.sparse_optimize_trsv!(uplo, transa, diag, dA)
                         oneMKL.sparse_trsv!(uplo, transa, diag, alpha, dA, dx, dy)
                         y = wrapper(opa(A)) \ (alpha * x)
-                        @test y ≈ collect(dy)
+                        @test isapprox(y, collect(dy), atol=ε)
                     end
                 end
             end
@@ -1255,7 +1259,7 @@ end
                             oneMKL.sparse_optimize_trsm!(uplo, transa, diag, dA)
                             oneMKL.sparse_trsm!(uplo, transa, transx, diag, alpha, dA, dX, dY)
                             Y = wrapper(opa(A)) \ (alpha * opx(X))
-                            @test Y ≈ collect(dY)
+                            @test isapprox(Y, collect(dY), atol=ε)
 
                             oneMKL.sparse_optimize_trsm!(uplo, transa, diag, 4, dA)
                         end

From f2ff379f2be5838ddd0b63d6d21b55af4f6bf05b Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 16 Sep 2025 21:47:03 -0600
Subject: [PATCH 6/8] Remove routines that we can't support for
 oneSparseMatrixCSC

---
 lib/mkl/wrappers_sparse.jl | 51 ++++++++++++++++++++------------------
 test/onemkl.jl             |  3 +--
 2 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/lib/mkl/wrappers_sparse.jl b/lib/mkl/wrappers_sparse.jl
index 9899848d..8315482f 100644
--- a/lib/mkl/wrappers_sparse.jl
+++ b/lib/mkl/wrappers_sparse.jl
@@ -287,30 +287,33 @@ function sparse_optimize_trmv!(uplo::Char, trans::Char, diag::Char, A::oneSparse
     return A
 end
 
-for (fname, elty) in ((:onemklSsparse_trmv, :Float32),
-                      (:onemklDsparse_trmv, :Float64))
-    @eval begin
-        function sparse_trmv!(uplo::Char,
-                              trans::Char,
-                              diag::Char,
-                              alpha::Number,
-                              A::oneSparseMatrixCSC{$elty},
-                              x::oneStridedVector{$elty},
-                              beta::Number,
-                              y::oneStridedVector{$elty})
-
-            queue = global_queue(context(y), device())
-            $fname(sycl_queue(queue), uplo, flip_trans(trans), diag, alpha, A.handle, x, beta, y)
-            y
-        end
-    end
-end
-
-function sparse_optimize_trmv!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC)
-    queue = global_queue(context(A.nzVal), device(A.nzVal))
-    onemklXsparse_optimize_trmv(sycl_queue(queue), uplo, flip_trans(trans), diag, A.handle)
-    return A
-end
+# Only trans = 'N' is supported with oneSparseMatrixCSR.
+# We can't use any trick to support sparse "trmv" for oneSparseMatrixCSC.
+#
+# for (fname, elty) in ((:onemklSsparse_trmv, :Float32),
+#                       (:onemklDsparse_trmv, :Float64))
+#     @eval begin
+#         function sparse_trmv!(uplo::Char,
+#                               trans::Char,
+#                               diag::Char,
+#                               alpha::Number,
+#                               A::oneSparseMatrixCSC{$elty},
+#                               x::oneStridedVector{$elty},
+#                               beta::Number,
+#                               y::oneStridedVector{$elty})
+
+#             queue = global_queue(context(y), device())
+#             $fname(sycl_queue(queue), uplo, flip_trans(trans), diag, alpha, A.handle, x, beta, y)
+#             y
+#         end
+#     end
+# end
+
+# function sparse_optimize_trmv!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC)
+#     queue = global_queue(context(A.nzVal), device(A.nzVal))
+#     onemklXsparse_optimize_trmv(sycl_queue(queue), uplo, flip_trans(trans), diag, A.handle)
+#     return A
+# end
 
 for (fname, elty) in ((:onemklSsparse_trsv, :Float32),
                       (:onemklDsparse_trsv, :Float64),
diff --git a/test/onemkl.jl b/test/onemkl.jl
index 53b47e09..ee56f748 100644
--- a/test/onemkl.jl
+++ b/test/onemkl.jl
@@ -1180,12 +1180,11 @@ end
         end
 
         @testset "sparse trmv" begin
-            @testset  "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC)
+            @testset  "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR,)
                 @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)]
                     for (uplo, diag, wrapper) in [('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular),
                                                   ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular)]
                         (transa == 'N') || continue
-                        (T <: Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
                         A = sprand(T, 10, 10, 0.5)
                         x = rand(T, 10)
                         y = rand(T, 10)

From 9bf4de6c0a3190fe7c7d134ca4a59afafa7093d5 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 16 Sep 2025 22:13:27 -0600
Subject: [PATCH 7/8] More fixes...

---
 lib/mkl/interfaces.jl      |   8 ---
 lib/mkl/wrappers_sparse.jl | 134 +++++++++++++++++++------------------
 test/onemkl.jl             |   7 +-
 3 files changed, 74 insertions(+), 75 deletions(-)

diff --git a/lib/mkl/interfaces.jl b/lib/mkl/interfaces.jl
index a4e5f83f..343131d9 100644
--- a/lib/mkl/interfaces.jl
+++ b/lib/mkl/interfaces.jl
@@ -28,14 +28,6 @@ function LinearAlgebra.generic_trimatdiv!(C::oneVector{T}, uploc, isunitc, tfun:
     sparse_trsv!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), A, B, C)
 end
 
-function LinearAlgebra.generic_trimatdiv!(C::oneVector{T}, uploc, isunitc, tfun::Function, A::oneSparseMatrixCSC{T}, B::oneVector{T}) where T <: BlasReal
-    sparse_trsv!(flip_uplo(uploc), tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), A, B, C)
-end
-
 function LinearAlgebra.generic_trimatdiv!(C::oneMatrix{T}, uploc, isunitc, tfun::Function, A::oneSparseMatrixCSR{T}, B::oneMatrix{T}) where T <: BlasFloat
     sparse_trsm!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'N', isunitc, one(T), A, B, C)
 end
-
-function LinearAlgebra.generic_trimatdiv!(C::oneMatrix{T}, uploc, isunitc, tfun::Function, A::oneSparseMatrixCSC{T}, B::oneMatrix{T}) where T <: BlasReal
-    sparse_trsm!(flip_uplo(uploc), tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'N', isunitc, one(T), A, B, C)
-end
diff --git a/lib/mkl/wrappers_sparse.jl b/lib/mkl/wrappers_sparse.jl
index 8315482f..360c00b7 100644
--- a/lib/mkl/wrappers_sparse.jl
+++ b/lib/mkl/wrappers_sparse.jl
@@ -301,14 +301,14 @@ end
 #                               x::oneStridedVector{$elty},
 #                               beta::Number,
 #                               y::oneStridedVector{$elty})
-
+#
 #             queue = global_queue(context(y), device())
 #             $fname(sycl_queue(queue), uplo, flip_trans(trans), diag, alpha, A.handle, x, beta, y)
 #             y
 #         end
 #     end
 # end
-
+#
 # function sparse_optimize_trmv!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC)
 #     queue = global_queue(context(A.nzVal), device(A.nzVal))
 #     onemklXsparse_optimize_trmv(sycl_queue(queue), uplo, flip_trans(trans), diag, A.handle)
@@ -341,29 +341,32 @@ function sparse_optimize_trsv!(uplo::Char, trans::Char, diag::Char, A::oneSparse
     return A
 end
 
-for (fname, elty) in ((:onemklSsparse_trsv, :Float32),
-                      (:onemklDsparse_trsv, :Float64))
-    @eval begin
-        function sparse_trsv!(uplo::Char,
-                              trans::Char,
-                              diag::Char,
-                              alpha::Number,
-                              A::oneSparseMatrixCSC{$elty},
-                              x::oneStridedVector{$elty},
-                              y::oneStridedVector{$elty})
-
-            queue = global_queue(context(y), device())
-            $fname(sycl_queue(queue), flip_uplo(uplo), trans, diag, alpha, A.handle, x, y)
-            y
-        end
-    end
-end
-
-function sparse_optimize_trsv!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC)
-    queue = global_queue(context(A.nzVal), device(A.nzVal))
-    onemklXsparse_optimize_trsv(sycl_queue(queue), flip_uplo(uplo), trans, diag, A.handle)
-    return A
-end
+# Only trans = 'N' is supported with oneSparseMatrixCSR.
+# We can't use any trick to support sparse "trsv" for oneSparseMatrixCSC.
+#
+# for (fname, elty) in ((:onemklSsparse_trsv, :Float32),
+#                       (:onemklDsparse_trsv, :Float64))
+#     @eval begin
+#         function sparse_trsv!(uplo::Char,
+#                               trans::Char,
+#                               diag::Char,
+#                               alpha::Number,
+#                               A::oneSparseMatrixCSC{$elty},
+#                               x::oneStridedVector{$elty},
+#                               y::oneStridedVector{$elty})
+#
+#             queue = global_queue(context(y), device())
+#             $fname(sycl_queue(queue), uplo, flip_trans(trans), diag, alpha, A.handle, x, y)
+#             y
+#         end
+#     end
+# end
+#
+# function sparse_optimize_trsv!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC)
+#     queue = global_queue(context(A.nzVal), device(A.nzVal))
+#     onemklXsparse_optimize_trsv(sycl_queue(queue), uplo, flip_trans(trans), diag, A.handle)
+#     return A
+# end
 
 for (fname, elty) in ((:onemklSsparse_trsm, :Float32),
                       (:onemklDsparse_trsm, :Float64),
@@ -407,42 +410,45 @@ function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, nrhs::Int, A
     return A
 end
 
-for (fname, elty) in ((:onemklSsparse_trsm, :Float32),
-                      (:onemklDsparse_trsm, :Float64))
-    @eval begin
-        function sparse_trsm!(uplo::Char,
-                              transA::Char,
-                              transX::Char,
-                              diag::Char,
-                              alpha::Number,
-                              A::oneSparseMatrixCSC{$elty},
-                              X::oneStridedMatrix{$elty},
-                              Y::oneStridedMatrix{$elty})
-
-            mX, nX = size(X)
-            mY, nY = size(Y)
-            (mX != mY) && (transX == 'N') && throw(ArgumentError("X and Y must have the same number of rows."))
-            (nX != nY) && (transX == 'N') && throw(ArgumentError("X and Y must have the same number of columns."))
-            (nX != mY) && (transX != 'N') && throw(ArgumentError("Xᵀ and Y must have the same number of rows."))
-            (mX != nY) && (transX != 'N') && throw(ArgumentError("Xᵀ and Y must have the same number of columns."))
-            nrhs = size(X, 2)
-            ldx = max(1,stride(X,2))
-            ldy = max(1,stride(Y,2))
-            queue = global_queue(context(Y), device())
-            $fname(sycl_queue(queue), 'C', transA, transX, flip_uplo(uplo), diag, alpha, A.handle, X, nrhs, ldx, Y, ldy)
-            Y
-        end
-    end
-end
-
-function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC)
-    queue = global_queue(context(A.nzVal), device(A.nzVal))
-    onemklXsparse_optimize_trsm(sycl_queue(queue), flip_uplo(uplo), trans, diag, A.handle)
-    return A
-end
-
-function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, nrhs::Int, A::oneSparseMatrixCSC)
-    queue = global_queue(context(A.nzVal), device(A.nzVal))
-    onemklXsparse_optimize_trsm_advanced(sycl_queue(queue), 'C', flip_uplo(uplo), trans, diag, A.handle, nrhs)
-    return A
-end
+# Only transA = 'N' is supported with oneSparseMatrixCSR.
+# We can't use any trick to support sparse "trsm" for oneSparseMatrixCSC.
+#
+# for (fname, elty) in ((:onemklSsparse_trsm, :Float32),
+#                       (:onemklDsparse_trsm, :Float64))
+#     @eval begin
+#         function sparse_trsm!(uplo::Char,
+#                               transA::Char,
+#                               transX::Char,
+#                               diag::Char,
+#                               alpha::Number,
+#                               A::oneSparseMatrixCSC{$elty},
+#                               X::oneStridedMatrix{$elty},
+#                               Y::oneStridedMatrix{$elty})
+#
+#             mX, nX = size(X)
+#             mY, nY = size(Y)
+#             (mX != mY) && (transX == 'N') && throw(ArgumentError("X and Y must have the same number of rows."))
+#             (nX != nY) && (transX == 'N') && throw(ArgumentError("X and Y must have the same number of columns."))
+#             (nX != mY) && (transX != 'N') && throw(ArgumentError("Xᵀ and Y must have the same number of rows."))
+#             (mX != nY) && (transX != 'N') && throw(ArgumentError("Xᵀ and Y must have the same number of columns."))
+#             nrhs = size(X, 2)
+#             ldx = max(1,stride(X,2))
+#             ldy = max(1,stride(Y,2))
+#             queue = global_queue(context(Y), device())
+#             $fname(sycl_queue(queue), 'C', flip_trans(transA), transX, uplo, diag, alpha, A.handle, X, nrhs, ldx, Y, ldy)
+#             Y
+#         end
+#     end
+# end
+#
+# function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC)
+#     queue = global_queue(context(A.nzVal), device(A.nzVal))
+#     onemklXsparse_optimize_trsm(sycl_queue(queue), uplo, flip_trans(trans), diag, A.handle)
+#     return A
+# end
+#
+# function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, nrhs::Int, A::oneSparseMatrixCSC)
+#     queue = global_queue(context(A.nzVal), device(A.nzVal))
+#     onemklXsparse_optimize_trsm_advanced(sycl_queue(queue), 'C', uplo, flip_trans(trans), diag, A.handle, nrhs)
+#     return A
+# end
diff --git a/test/onemkl.jl b/test/onemkl.jl
index ee56f748..5e7ef0da 100644
--- a/test/onemkl.jl
+++ b/test/onemkl.jl
@@ -1174,7 +1174,7 @@ end
                     alpha = rand(T)
                     beta = rand(T)
                     oneMKL.sparse_symv!(uplo, alpha, dA, dx, beta, dy)
-                    @test isapprox(alpha * A * x + beta * y, collect(dy), atol=ε)
+                    @test_broken isapprox(alpha * A * x + beta * y, collect(dy), atol=ε)
                 end
             end
         end
@@ -1185,6 +1185,7 @@ end
                     for (uplo, diag, wrapper) in [('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular),
                                                   ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular)]
                         (transa == 'N') || continue
+                        (T <: Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
                         A = sprand(T, 10, 10, 0.5)
                         x = rand(T, 10)
                         y = rand(T, 10)
@@ -1207,7 +1208,7 @@ end
         end
 
         @testset "sparse trsv" begin
-            @testset  "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC)
+            @testset  "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR,)
                 @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)]
                     for (uplo, diag, wrapper) in [('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular),
                                                   ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular)]
@@ -1235,7 +1236,7 @@ end
         end
 
         @testset "sparse trsm" begin
-            @testset  "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC)
+            @testset  "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR,)
                 @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)]
                     @testset "transx = $transx" for (transx, opx) in [('N', identity), ('T', transpose), ('C', adjoint)]
                         (transx != 'N') && continue

From a8bbba20b9f7fc5c58095c0a950cd5a564f72ae2 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 16 Sep 2025 22:28:14 -0600
Subject: [PATCH 8/8] More fixes...

---
 test/onemkl.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/onemkl.jl b/test/onemkl.jl
index 5e7ef0da..95c0ed3f 100644
--- a/test/onemkl.jl
+++ b/test/onemkl.jl
@@ -1163,7 +1163,7 @@ end
                 @testset "uplo = $uplo" for uplo in ('L', 'U')
                     (T <: Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue
                     A = sprand(T, 10, 10, 0.5)
-                    A = A + A'
+                    A = A + transpose(A)
                     x = rand(T, 10)
                     y = rand(T, 10)
 
@@ -1174,7 +1174,7 @@ end
                     alpha = rand(T)
                     beta = rand(T)
                     oneMKL.sparse_symv!(uplo, alpha, dA, dx, beta, dy)
-                    @test_broken isapprox(alpha * A * x + beta * y, collect(dy), atol=ε)
+                    @test isapprox(alpha * A * x + beta * y, collect(dy), atol=ε)
                 end
             end
         end