This repository was archived by the owner on Aug 22, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 44
This repository was archived by the owner on Aug 22, 2025. It is now read-only.
out of place Jacobian decomposition mutates #108
Copy link
Copy link
Open
Description
Performance results are highly variable when using cached out of place methods. I've gotten segfaults, although I cannot reliably reproduce that portion of the issue. Using CuArrays seems to amplify the issue.
using Revise
using Flux, BenchmarkTools, CuArrays, CUDAnative, ForwardDiff, LinearAlgebra, Random
function mwe(N, ::Type{T}=Float32) where T<:Real
A::Matrix{T} = rand(T, N,N)
cuA = A |> gpu
function f!(out, A)
out .= A .+ A .* A .+ 1f0
end
krn(x) = x + x*x + 1f0
function f!(out, A::CuMatrix{Float32})
out .= krn.(A)
end
function f(A)
return A .+ A .* A .+ 1f0
end
function f(A::CuMatrix{Float32})
return krn.(A)
end
J = rand(T, N^2, N^2)
@info "test cpu (inplace)"
cache = SparseDiffTools.ForwardColorJacCache(f!,A, dx = similar(A))
SparseDiffTools.forwarddiff_color_jacobian!(J, f!, A, cache)
(N<5) && @info "test ∇f cpu inplace: $(J)"
(N>5) && @btime SparseDiffTools.forwarddiff_color_jacobian!($J, $f!, $A, $cache)
@info "test cpu (out of place)"
cacheoos = SparseDiffTools.ForwardColorJacCache(f,A, dx = similar(A))
J = SparseDiffTools.forwarddiff_color_jacobian(f, A, cacheoos)
(N<5) && @info "test ∇f cpu oop: $(J)"
(N>5) && @btime SparseDiffTools.forwarddiff_color_jacobian($f, $A, $cacheoos)
@info "test gpu (inplace)"
cuJ = J |> gpu
cucache = SparseDiffTools.ForwardColorJacCache(f!,cuA, dx = similar(cuA))
SparseDiffTools.forwarddiff_color_jacobian!(cuJ, f!, cuA, cucache)
(N<5) && @info "test ∇f gpu inplace: $(cuJ)"
(N>5) && @btime SparseDiffTools.forwarddiff_color_jacobian!($cuJ, $f!, $cuA, $cucache)
@info "test gpu (outofplace)"
cucacheoop = SparseDiffTools.ForwardColorJacCache(f,cuA, dx = similar(cuA))
cuJ = SparseDiffTools.forwarddiff_color_jacobian(f, cuA, cucacheoop)
(N<5) && @info "test ∇f gpu oop: $(cuJ)"
(N>5) && @btime SparseDiffTools.forwarddiff_color_jacobian($f, $cuA, $cucacheoop)
end
mwe(12)Output:
[ Info: test cpu (inplace)
46.500 μs (8 allocations: 320 bytes)
[ Info: test cpu (out of place)
181.946 ms (80271 allocations: 853.10 MiB)
[ Info: test gpu (inplace)
12.860 ms (10965 allocations: 402.14 KiB)
[ Info: test gpu (outofplace)
3.110 s (3516919 allocations: 122.65 MiB)
Metadata
Metadata
Assignees
Labels
No labels