-
-
Notifications
You must be signed in to change notification settings - Fork 70
Description
Solving ODE using ArrayPartition of GPUArrays fails for implicit solvers -- but not explicit solvers (or at least Tsit5) -- possibly because of the zeromatrix definition using Vector, which is called from build_J_W for implicit solvers.
I'm using this MWE with the Lorenz example which obviously does not need a stiff solver, but my hope was to use this setup to solve a stiff chemical system that must take as its input data already stored on the GPU as a NamedTuple of 3-dimensional CuArrays.
function lorenz!(du, u, p, t)
u1 = u.x[1]
u2 = u.x[2]
u3 = u.x[3]
σ = p.x[1]
ρ = p.x[2]
β = p.x[3]
du1 = du.x[1]
du2 = du.x[2]
du3 = du.x[3]
@. begin
du1 = σ * (y - u1)
du2 = u1 * (ρ - u3) - u2
du3 = u1 * u2 - β * u3
end
return nothing
end
u0 = [1.0f0; 0.0f0; 0.0f0]
tspan = (0.0f0, 10.0f0)
p = [10.0f0, 28.0f0, 8 / 3.0f0]
nx, ny, nz = (32, 32, 32)
X = CUDA.rand(Float32, nx, ny, nz) .* u0[1];
Y = CUDA.rand(Float32, nx, ny, nz) .* u0[2];
Z = CUDA.rand(Float32, nx, ny, nz) .* u0[3];
U0 = RecursiveArrayTools.ArrayPartition((X, Y, Z));
σ = CUDA.rand(Float32, nx, ny, nz) .* p[1];
ρ = CUDA.rand(Float32, nx, ny, nz) .* p[2];
β = CUDA.rand(Float32, nx, ny, nz) .* p[3];
P = RecursiveArrayTools.ArrayPartition((σ, ρ, β));
prob = ODEProblem(lorenz!, U0, tspan, P);
# This works
solve(prob, Tsit5(); save_everystep = false, reltol = 1f-6, abstol = 1f-10)
# Any implicit method fails because somewhere along the way because of the call
# to ArrayInterface.zeromatrix(A::ArrayPartition), I do believe.
solve(prob, Kvaerno5(); save_everystep = false, reltol = 1f-6, abstol = 1f-10) Error & Stacktrace
Here is the CUDA Stacktrace using a single GPU on NCAR's Derecho machine (so a somewhat complicated hardware/OS environment)
ERROR: Scalar indexing is disallowed.
Invocation of getindex resulted in scalar indexing of a GPU array.
This is typically caused by calling an iterating implementation of a method.
Such implementations *do not* execute on the GPU, but very slowly on the CPU,
and therefore should be avoided.
If you want to allow scalar iteration, use `allowscalar` or `@allowscalar`
to enable scalar iteration globally or for the operations in question.
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:35
[2] errorscalar(op::String)
@ GPUArraysCore /glade/work/ctowery/.julia/packages/GPUArraysCore/aNaXo/src/GPUArraysCore.jl:151
[3] _assertscalar(op::String, behavior::GPUArraysCore.ScalarIndexing)
@ GPUArraysCore /glade/work/ctowery/.julia/packages/GPUArraysCore/aNaXo/src/GPUArraysCore.jl:124
[4] assertscalar(op::String)
@ GPUArraysCore /glade/work/ctowery/.julia/packages/GPUArraysCore/aNaXo/src/GPUArraysCore.jl:112
[5] getindex
@ /glade/work/ctowery/.julia/packages/GPUArrays/w335n/src/host/indexing.jl:50 [inlined]
[6] _broadcast_getindex
@ ./broadcast.jl:675 [inlined]
[7] _getindex
@ ./broadcast.jl:706 [inlined]
[8] _broadcast_getindex
@ ./broadcast.jl:681 [inlined]
[9] getindex
@ ./broadcast.jl:636 [inlined]
[10] cpu_broadcast_kernel_linear
@ /glade/work/ctowery/.julia/packages/KernelAbstractions/X5fk1/src/macros.jl:314 [inlined]
[11] (::GPUArrays.var"#cpu_broadcast_kernel_linear#38")(__ctx__::KernelAbstractions.CompilerMetadata{…}, dest::SubArray{…}, bc::Base.Broadcast.Broadcasted{…})
@ GPUArrays ./none:0
[12] __thread_run(tid::Int64, len::Int64, rem::Int64, obj::KernelAbstractions.Kernel{…}, ndrange::Tuple{…}, iterspace::KernelAbstractions.NDIteration.NDRange{…}, args::Tuple{…}, dynamic::KernelAbstractions.NDIteration.NoDynamicCheck)
@ KernelAbstractions /glade/work/ctowery/.julia/packages/KernelAbstractions/X5fk1/src/cpu.jl:145
[13] __run(obj::KernelAbstractions.Kernel{…}, ndrange::Tuple{…}, iterspace::KernelAbstractions.NDIteration.NDRange{…}, args::Tuple{…}, dynamic::KernelAbstractions.NDIteration.NoDynamicCheck, static_threads::Bool)
@ KernelAbstractions /glade/work/ctowery/.julia/packages/KernelAbstractions/X5fk1/src/cpu.jl:112
[14] (::KernelAbstractions.Kernel{…})(::SubArray{…}, ::Vararg{…}; ndrange::Tuple{…}, workgroupsize::Nothing)
@ KernelAbstractions /glade/work/ctowery/.julia/packages/KernelAbstractions/X5fk1/src/cpu.jl:46
[15] Kernel
@ /glade/work/ctowery/.julia/packages/KernelAbstractions/X5fk1/src/cpu.jl:39 [inlined]
[16] _copyto!
@ /glade/work/ctowery/.julia/packages/GPUArrays/w335n/src/host/broadcast.jl:71 [inlined]
[17] materialize!
@ /glade/work/ctowery/.julia/packages/GPUArrays/w335n/src/host/broadcast.jl:38 [inlined]
[18] materialize!
@ ./broadcast.jl:911 [inlined]
[19] copyto!(dest::Vector{Float32}, A::ArrayPartition{Float32, Tuple{CuArray{…}, CuArray{…}, CuArray{…}}})
@ RecursiveArrayTools /glade/work/ctowery/.julia/packages/RecursiveArrayTools/cqMRn/src/array_partition.jl:193
[20] copyto_axcheck!
@ ./abstractarray.jl:1176 [inlined]
[21] Vector{Float32}(x::ArrayPartition{Float32, Tuple{CuArray{Float32, 3, CUDA.DeviceMemory}, CuArray{Float32, 3, CUDA.DeviceMemory}, CuArray{Float32, 3, CUDA.DeviceMemory}}})
@ Base ./array.jl:673
[22] Array
@ ./boot.jl:498 [inlined]
[23] zeromatrix(A::ArrayPartition{Float32, Tuple{CuArray{Float32, 3, CUDA.DeviceMemory}, CuArray{Float32, 3, CUDA.DeviceMemory}, CuArray{Float32, 3, CUDA.DeviceMemory}}})
@ RecursiveArrayTools /glade/work/ctowery/.julia/packages/RecursiveArrayTools/cqMRn/src/array_partition.jl:445
[24] build_J_W
@ /glade/work/ctowery/.julia/packages/OrdinaryDiffEqDifferentiation/TKWRC/src/derivative_utils.jl:845 [inlined]
[25] build_nlsolver(alg::Kvaerno5{…}, nlalg::NLNewton{…}, u::ArrayPartition{…}, uprev::ArrayPartition{…}, p::ArrayPartition{…}, t::Float32, dt::Float32, f::ODEFunction{…}, rate_prototype::ArrayPartition{…}, ::Type{…}, ::Type{…}, ::Type{…}, γ::Float32, c::Float32, α::Int64, ::Val{…})
@ OrdinaryDiffEqNonlinearSolve /glade/work/ctowery/.julia/packages/OrdinaryDiffEqNonlinearSolve/EABtI/src/utils.jl:213
[26] build_nlsolver
@ /glade/work/ctowery/.julia/packages/OrdinaryDiffEqNonlinearSolve/EABtI/src/utils.jl:152 [inlined]
[27] build_nlsolver
@ /glade/work/ctowery/.julia/packages/OrdinaryDiffEqNonlinearSolve/EABtI/src/utils.jl:142 [inlined]
[28] alg_cache(alg::Kvaerno5{…}, u::ArrayPartition{…}, rate_prototype::ArrayPartition{…}, ::Type{…}, ::Type{…}, ::Type{…}, uprev::ArrayPartition{…}, uprev2::ArrayPartition{…}, f::ODEFunction{…}, t::Float32, dt::Float32, reltol::Float32, p::ArrayPartition{…}, calck::Bool, ::Val{…})
@ OrdinaryDiffEqSDIRK /glade/work/ctowery/.julia/packages/OrdinaryDiffEqSDIRK/Dkxij/src/kencarp_kvaerno_caches.jl:362
[29] __init(prob::ODEProblem{…}, alg::Kvaerno5{…}, timeseries_init::Tuple{}, ts_init::Tuple{}, ks_init::Tuple{}; saveat::Tuple{}, tstops::Tuple{}, d_discontinuities::Tuple{}, save_idxs::Nothing, save_everystep::Bool, save_on::Bool, save_discretes::Bool, save_start::Bool, save_end::Nothing, callback::Nothing, dense::Bool, calck::Bool, dt::Float32, dtmin::Float32, dtmax::Float32, force_dtmin::Bool, adaptive::Bool, gamma::Rational{…}, abstol::Float32, reltol::Float32, qmin::Rational{…}, qmax::Int64, qsteady_min::Int64, qsteady_max::Rational{…}, beta1::Nothing, beta2::Nothing, qoldinit::Rational{…}, controller::Nothing, fullnormalize::Bool, failfactor::Int64, maxiters::Int64, internalnorm::typeof(DiffEqBase.ODE_DEFAULT_NORM), internalopnorm::typeof(LinearAlgebra.opnorm), isoutofdomain::typeof(DiffEqBase.ODE_DEFAULT_ISOUTOFDOMAIN), unstable_check::typeof(DiffEqBase.ODE_DEFAULT_UNSTABLE_CHECK), verbose::Bool, timeseries_errors::Bool, dense_errors::Bool, advance_to_tstop::Bool, stop_at_next_tstop::Bool, initialize_save::Bool, progress::Bool, progress_steps::Int64, progress_name::String, progress_message::typeof(DiffEqBase.ODE_DEFAULT_PROG_MESSAGE), progress_id::Symbol, userdata::Nothing, allow_extrapolation::Bool, initialize_integrator::Bool, alias::ODEAliasSpecifier, initializealg::DiffEqBase.DefaultInit, kwargs::@Kwargs{})
@ OrdinaryDiffEqCore /glade/work/ctowery/.julia/packages/OrdinaryDiffEqCore/GMkz9/src/solve.jl:410
[30] __init (repeats 2 times)
@ /glade/work/ctowery/.julia/packages/OrdinaryDiffEqCore/GMkz9/src/solve.jl:11 [inlined]
[31] __solve(::ODEProblem{…}, ::Kvaerno5{…}; kwargs::@Kwargs{…})
@ OrdinaryDiffEqCore /glade/work/ctowery/.julia/packages/OrdinaryDiffEqCore/GMkz9/src/solve.jl:6
[32] __solve
@ /glade/work/ctowery/.julia/packages/OrdinaryDiffEqCore/GMkz9/src/solve.jl:1 [inlined]
[33] solve_call(_prob::ODEProblem{…}, args::Kvaerno5{…}; merge_callbacks::Bool, kwargshandle::Nothing, kwargs::@Kwargs{…})
@ DiffEqBase /glade/work/ctowery/.julia/packages/DiffEqBase/aB45d/src/solve.jl:127
[34] solve_call
@ /glade/work/ctowery/.julia/packages/DiffEqBase/aB45d/src/solve.jl:84 [inlined]
[35] #solve_up#39
@ /glade/work/ctowery/.julia/packages/DiffEqBase/aB45d/src/solve.jl:563 [inlined]
[36] solve_up
@ /glade/work/ctowery/.julia/packages/DiffEqBase/aB45d/src/solve.jl:540 [inlined]
[37] #solve#38
@ /glade/work/ctowery/.julia/packages/DiffEqBase/aB45d/src/solve.jl:530 [inlined]
[38] top-level scope
@ REPL[19]:1Environment (please complete the following information):
- Output of
using Pkg; Pkg.status()
Status `~/DiffEq/Project.toml`
[052768ef] CUDA v5.9.3
⌅ [f68482b8] Cthulhu v2.16.6
[2b5f629d] DiffEqBase v6.191.0
[071ae1c0] DiffEqGPU v3.9.0
[1dea7af3] OrdinaryDiffEq v6.103.0
[731186ca] RecursiveArrayTools v3.39.0
[90137ffa] StaticArrays v1.9.15- Output of
using Pkg; Pkg.status(; mode = PKGMODE_MANIFEST)
Status `~/DiffEq/Manifest.toml`
[47edcb42] ADTypes v1.18.0
[621f4979] AbstractFFTs v1.5.0
[1520ce14] AbstractTrees v0.4.5
[7d9f7c33] Accessors v0.1.42
[79e6a3ab] Adapt v4.4.0
[4fba245c] ArrayInterface v7.22.0
[4c555306] ArrayLayouts v1.12.0
[a9b6321e] Atomix v1.1.2
[ab4f0b2a] BFloat16s v0.6.0
[62783981] BitTwiddlingConvenienceFunctions v0.1.6
[70df07ce] BracketingNonlinearSolve v1.6.0
[fa961155] CEnum v0.5.0
[2a0fbf3d] CPUSummary v0.2.7
[052768ef] CUDA v5.9.3
[1af6417a] CUDA_Runtime_Discovery v1.0.0
[d360d2e6] ChainRulesCore v1.26.0
[fb6a15b2] CloseOpenIntervals v0.1.13
[da1fd8a2] CodeTracking v2.0.2
[3da002f7] ColorTypes v0.12.1
[5ae59095] Colors v0.13.1
[38540f10] CommonSolve v0.2.4
[bbf7d656] CommonSubexpressions v0.3.1
[f70d9fcc] CommonWorldInvalidations v1.0.0
[34da2185] Compat v4.18.1
[a33af91c] CompositionsBase v0.1.2
[2569d6c7] ConcreteStructs v0.2.3
[187b0558] ConstructionBase v1.6.0
[adafc99b] CpuId v0.3.1
[a8cc5b0e] Crayons v4.1.1
⌅ [f68482b8] Cthulhu v2.16.6
[9a962f9c] DataAPI v1.16.0
[a93c6f00] DataFrames v1.8.1
[864edb3b] DataStructures v0.19.3
[e2d170a0] DataValueInterfaces v1.0.0
[2b5f629d] DiffEqBase v6.191.0
[071ae1c0] DiffEqGPU v3.9.0
[163ba53b] DiffResults v1.1.0
[b552c78f] DiffRules v1.15.1
[a0c0ee7d] DifferentiationInterface v0.7.11
[ffbed154] DocStringExtensions v0.9.5
[4e289a0a] EnumX v1.0.5
[f151be2c] EnzymeCore v0.8.16
[d4d017d3] ExponentialUtilities v1.27.0
[e2ba6199] ExprTools v0.1.10
[55351af7] ExproniconLite v0.10.14
[7034ab61] FastBroadcast v0.3.5
[9aa1b823] FastClosures v0.3.2
[442a2c76] FastGaussQuadrature v1.1.0
[a4df4552] FastPower v1.2.0
[1a297f60] FillArrays v1.15.0
[6a86dc24] FiniteDiff v2.29.0
[53c48c17] FixedPointNumbers v0.8.5
[1eca21be] FoldingTrees v1.2.2
⌃ [f6369f11] ForwardDiff v1.2.2
[069b7b12] FunctionWrappers v1.1.3
[77dc65aa] FunctionWrappersWrappers v0.1.3
[0c68f7d7] GPUArrays v11.2.6
[46192b85] GPUArraysCore v0.2.0
⌃ [61eb1bfa] GPUCompiler v1.7.2
[096a3bc2] GPUToolbox v1.0.0
[c145ed77] GenericSchur v0.5.6
[076d061b] HashArrayMappedTries v0.2.0
[615f187c] IfElse v0.1.1
[842dd82b] InlineStrings v1.4.5
[3587e190] InverseFunctions v0.1.17
[41ab1584] InvertedIndices v1.3.1
[92d709cd] IrrationalConstants v0.2.6
[82899510] IteratorInterfaceExtensions v1.0.0
[692b3bcd] JLLWrappers v1.7.1
[ae98c720] Jieko v0.2.1
⌅ [70703baa] JuliaSyntax v0.4.10
[63c18a36] KernelAbstractions v0.9.39
[ba0b0d4f] Krylov v0.10.2
[929cbde3] LLVM v9.4.4
[8b046642] LLVMLoopInfo v1.0.0
[b964fa9f] LaTeXStrings v1.4.0
[10f19ff3] LayoutPointers v0.1.17
[5078a376] LazyArrays v2.9.0
[87fe0de2] LineSearch v0.1.4
[d3d80556] LineSearches v7.4.0
[7ed4a6bd] LinearSolve v3.46.1
[2ab3a3ac] LogExpFunctions v0.3.29
[e6f89c97] LoggingExtras v1.2.0
[1914dd2f] MacroTools v0.5.16
[d125e4d3] ManualMemory v0.1.8
[bb5d69b7] MaybeInplace v0.1.4
[e1d29d7a] Missings v1.2.0
[2e0e35c7] Moshi v0.3.7
[46d2c3a1] MuladdMacro v0.2.4
[d41bc354] NLSolversBase v7.10.0
[5da4648a] NVTX v1.0.1
[77ba4419] NaNMath v1.1.3
[8913a72c] NonlinearSolve v4.12.0
[be0214bd] NonlinearSolveBase v2.2.0
[5959db7a] NonlinearSolveFirstOrder v1.10.0
[9a2c21bd] NonlinearSolveQuasiNewton v1.11.0
[26075421] NonlinearSolveSpectralMethods v1.6.0
[bac558e1] OrderedCollections v1.8.1
[1dea7af3] OrdinaryDiffEq v6.103.0
[89bda076] OrdinaryDiffEqAdamsBashforthMoulton v1.5.0
[6ad6398a] OrdinaryDiffEqBDF v1.10.1
[bbf590c4] OrdinaryDiffEqCore v1.36.0
[50262376] OrdinaryDiffEqDefault v1.8.0
[4302a76b] OrdinaryDiffEqDifferentiation v1.16.1
[9286f039] OrdinaryDiffEqExplicitRK v1.4.0
[e0540318] OrdinaryDiffEqExponentialRK v1.8.0
[becaefa8] OrdinaryDiffEqExtrapolation v1.9.0
[5960d6e9] OrdinaryDiffEqFIRK v1.16.0
[101fe9f7] OrdinaryDiffEqFeagin v1.4.0
[d3585ca7] OrdinaryDiffEqFunctionMap v1.5.0
[d28bc4f8] OrdinaryDiffEqHighOrderRK v1.5.0
[9f002381] OrdinaryDiffEqIMEXMultistep v1.7.0
[521117fe] OrdinaryDiffEqLinear v1.6.0
[1344f307] OrdinaryDiffEqLowOrderRK v1.6.0
[b0944070] OrdinaryDiffEqLowStorageRK v1.7.0
[127b3ac7] OrdinaryDiffEqNonlinearSolve v1.15.0
[c9986a66] OrdinaryDiffEqNordsieck v1.4.0
[5dd0a6cf] OrdinaryDiffEqPDIRK v1.6.0
[5b33eab2] OrdinaryDiffEqPRK v1.4.0
[04162be5] OrdinaryDiffEqQPRK v1.4.0
[af6ede74] OrdinaryDiffEqRKN v1.5.0
[43230ef6] OrdinaryDiffEqRosenbrock v1.18.1
[2d112036] OrdinaryDiffEqSDIRK v1.7.0
[669c94d9] OrdinaryDiffEqSSPRK v1.7.0
[e3e12d00] OrdinaryDiffEqStabilizedIRK v1.6.0
[358294b1] OrdinaryDiffEqStabilizedRK v1.4.0
[fa646aed] OrdinaryDiffEqSymplecticRK v1.7.0
[b1df2697] OrdinaryDiffEqTsit5 v1.5.0
[79d7bb75] OrdinaryDiffEqVerner v1.6.0
[d96e819e] Parameters v0.12.3
[f517fe37] Polyester v0.7.18
[1d0040c9] PolyesterWeave v0.2.2
[2dfb63ee] PooledArrays v1.4.3
[d236fae5] PreallocationTools v0.4.34
⌅ [aea7be01] PrecompileTools v1.2.1
[21216c6a] Preferences v1.5.0
[08abe8d2] PrettyTables v3.1.0
[74087812] Random123 v1.7.1
[e6cf234a] RandomNumbers v1.6.0
[3cdcf5f2] RecipesBase v1.3.4
[731186ca] RecursiveArrayTools v3.39.0
[189a3867] Reexport v1.2.2
[ae029012] Requires v1.3.1
[7e49a35a] RuntimeGeneratedFunctions v0.5.16
[94e857df] SIMDTypes v0.1.0
⌃ [0bca4576] SciMLBase v2.124.0
[19f34311] SciMLJacobianOperators v0.1.11
[a6db7da4] SciMLLogging v1.5.0
[c0aeaf25] SciMLOperators v1.10.0
[431bcebd] SciMLPublic v1.0.0
[53ae85a6] SciMLStructures v1.7.0
[7e506255] ScopedValues v1.5.0
[6c6a2e73] Scratch v1.3.0
[91c51154] SentinelArrays v1.4.8
[efcf1570] Setfield v1.1.2
[05bca326] SimpleDiffEq v1.12.0
[727e6d20] SimpleNonlinearSolve v2.9.0
[ce78b400] SimpleUnPack v1.1.0
[a2af1166] SortingAlgorithms v1.2.2
[0a514795] SparseMatrixColorings v0.4.23
[276daf66] SpecialFunctions v2.6.1
[aedffcd0] Static v1.3.1
[0d7ed370] StaticArrayInterface v1.8.0
[90137ffa] StaticArrays v1.9.15
[1e83bf80] StaticArraysCore v1.4.4
[7792a7ef] StrideArraysCore v0.5.8
[892a3eda] StringManipulation v0.4.1
[2efcf032] SymbolicIndexingInterface v0.3.46
[3783bdb8] TableTraits v1.0.1
[bd369af6] Tables v1.12.1
[8290d209] ThreadingUtilities v0.5.5
[a759f4b9] TimerOutputs v0.5.29
[e689c965] Tracy v0.1.6
[781d530d] TruncatedStacktraces v1.4.0
⌅ [d265eb64] TypedSyntax v1.4.3
[3a884ed6] UnPack v1.0.2
[013be700] UnsafeAtomics v0.3.0
[b8c1c048] WidthLimitedIO v1.0.1
[700de1a5] ZygoteRules v0.2.7
[d1e2174e] CUDA_Compiler_jll v0.3.0+0
[4ee394cb] CUDA_Driver_jll v13.0.2+0
[76a88914] CUDA_Runtime_jll v0.19.2+0
[1d5cc7b8] IntelOpenMP_jll v2025.2.0+0
[9c1d0b0a] JuliaNVTXCallbacks_jll v0.2.1+0
[dad2f222] LLVMExtra_jll v0.0.38+0
[ad6e5548] LibTracyClient_jll v0.9.1+6
[856f044c] MKL_jll v2025.2.0+0
[e98f9f5b] NVTX_jll v3.2.2+0
[efe28fd5] OpenSpecFun_jll v0.5.6+0
[1e29f10c] demumble_jll v1.3.0+0
[1317d2d5] oneTBB_jll v2022.0.0+1
[0dad84c5] ArgTools v1.1.1
[56f22d72] Artifacts
[2a0f44e3] Base64
[ade2ca70] Dates
[8ba89e20] Distributed
[f43a241f] Downloads v1.6.0
[7b1f6079] FileWatching
[9fa8497b] Future
[b77e0a4c] InteractiveUtils
[4af54fe1] LazyArtifacts
[b27032c2] LibCURL v0.6.4
[76f85450] LibGit2
[8f399da3] Libdl
[37e2e46d] LinearAlgebra
[56ddb016] Logging
[d6f4376e] Markdown
[ca575930] NetworkOptions v1.2.0
[44cfe95a] Pkg v1.10.0
[de0858da] Printf
[3fa0cd96] REPL
[9a3f8284] Random
[ea8e919c] SHA v0.7.0
[9e88b42a] Serialization
[6462fe0b] Sockets
[2f01184e] SparseArrays v1.10.0
[10745b16] Statistics v1.10.0
[fa267f1f] TOML v1.0.3
[a4e569a6] Tar v1.10.0
[cf7118a7] UUIDs
[4ec0a83e] Unicode
[e66e0078] CompilerSupportLibraries_jll v1.1.1+0
[deac9b47] LibCURL_jll v8.4.0+0
[e37daf67] LibGit2_jll v1.6.4+0
[29816b5a] LibSSH2_jll v1.11.0+1
[c8ffd9c3] MbedTLS_jll v2.28.2+1
[14a3606d] MozillaCACerts_jll v2023.1.10
[4536629a] OpenBLAS_jll v0.3.23+4
[05823500] OpenLibm_jll v0.8.1+2
[bea87d4a] SuiteSparse_jll v7.2.1+1
[83775a58] Zlib_jll v1.2.13+1
[8e850b90] libblastrampoline_jll v5.11.0+0
[8e850ede] nghttp2_jll v1.52.0+1
[3f19e933] p7zip_jll v17.4.0+2- Output of
versioninfo()
Julia Version 1.10.8
Commit 4c16ff44be8 (2025-01-22 10:06 UTC)
Build Info:
Official https://julialang.org/ release
Platform Info:
OS: Linux (x86_64-linux-gnu)
CPU: 128 × AMD EPYC 7763 64-Core Processor
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-15.0.7 (ORCJIT, znver3)
Threads: 1 default, 0 interactive, 1 GC (on 128 virtual cores)
Environment:
LD_LIBRARY_PATH = /glade/u/apps/casper/24.12/spack/opt/spack/cuda/12.3.2/gcc/12.4.0/jw45/lib64:/glade/u/apps/casper/24.12/spack/opt/spack/cuda/12.3.2/gcc/12.4.0/jw45/nvvm/lib64:/glade/u/apps/casper/24.12/spack/opt/spack/cuda/12.3.2/gcc/12.4.0/jw45/extras/CUPTI/lib64:/glade/u/apps/casper/24.12/spack/opt/spack/cuda/12.3.2/gcc/12.4.0/jw45/extras/Debugger/lib64:/opt/cray/pe/cti/2.18.3/lib:/opt/cray/pe/pmi/6.1.14/lib:/opt/cray/pe/mpich/8.1.29/ofi/intel/2022.1/lib:/opt/cray/libfabric/1.15.2.0/lib64:/opt/cray/libfabric/1.15.2.0/lib:/glade/u/apps/casper/24.12/spack/opt/spack/intel-oneapi-compilers/2024.2.1/gcc/12.4.0/iq3b/compiler/2024.2/opt/compiler/lib:/glade/u/apps/casper/24.12/spack/opt/spack/intel-oneapi-compilers/2024.2.1/gcc/12.4.0/iq3b/compiler/2024.2/lib:/glade/u/apps/derecho/24.12/spack/opt/spack/hdf5/1.12.3/oneapi/2024.2.1/peu3/lib
JULIA_PATH = /glade/u/home/ctowery/software/julia-1.10.8/bin
JULIA_DEPOT_PATH = /glade/work/ctowery/.juliaAdditional context
I can replicate a similar error using my M1 Pro Apple laptop and Metal vs CUDA:
ERROR: GPU compilation of MethodInstance for (::Metal.var"#broadcast_linear#228")(::SubArray{…}, ::Base.Broadcast.Broadcasted{…}) failed
KernelError: passing non-bitstype argument
Argument 2 to your kernel function is of type SubArray{Float32, 1, Vector{Float32}, Tuple{UnitRange{Int64}}, true}, which is not a bitstype:
.parent is of type Vector{Float32} which is not isbits.
Only bitstypes, which are "plain data" types that are immutable
and contain no references to other values, can be used in GPU kernels.
For more information, see the `Base.isbitstype` function.
Stacktrace:
[1] check_invocation(job::GPUCompiler.CompilerJob)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Gp8bZ/src/validation.jl:108
[2] macro expansion
@ ~/.julia/packages/GPUCompiler/Gp8bZ/src/driver.jl:87 [inlined]
[3] macro expansion
@ ~/.julia/packages/Tracy/tYwAE/src/tracepoint.jl:163 [inlined]
[4] compile_unhooked(output::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/Gp8bZ/src/driver.jl:85
[5] compile_unhooked
@ ~/.julia/packages/GPUCompiler/Gp8bZ/src/driver.jl:80 [inlined]
[6] compile(target::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/Gp8bZ/src/driver.jl:67
[7] compile
@ ~/.julia/packages/GPUCompiler/Gp8bZ/src/driver.jl:55 [inlined]
[8] (::Metal.var"#157#165"{GPUCompiler.CompilerJob{…}})(ctx::LLVM.Context)
@ Metal ~/.julia/packages/Metal/av3If/src/compiler/compilation.jl:173
[9] JuliaContext(f::Metal.var"#157#165"{GPUCompiler.CompilerJob{…}}; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/Gp8bZ/src/driver.jl:34
[10] JuliaContext(f::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Gp8bZ/src/driver.jl:25
[11] macro expansion
@ ~/.julia/packages/Metal/av3If/src/compiler/compilation.jl:172 [inlined]
[12] macro expansion
@ ~/.julia/packages/ObjectiveC/UNTzb/src/os.jl:264 [inlined]
[13] compile(job::GPUCompiler.CompilerJob)
@ Metal ~/.julia/packages/Metal/av3If/src/compiler/compilation.jl:170
[14] actual_compilation(cache::Dict{…}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{…}, compiler::typeof(Metal.compile), linker::typeof(Metal.link))
@ GPUCompiler ~/.julia/packages/GPUCompiler/Gp8bZ/src/execution.jl:245
[15] cached_compilation(cache::Dict{…}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{…}, compiler::Function, linker::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Gp8bZ/src/execution.jl:159
[16] macro expansion
@ ~/.julia/packages/Metal/av3If/src/compiler/execution.jl:189 [inlined]
[17] macro expansion
@ ./lock.jl:267 [inlined]
[18] mtlfunction(f::Metal.var"#broadcast_linear#228", tt::Type{Tuple{…}}; name::Nothing, kwargs::@Kwargs{})
@ Metal ~/.julia/packages/Metal/av3If/src/compiler/execution.jl:184
[19] mtlfunction
@ ~/.julia/packages/Metal/av3If/src/compiler/execution.jl:182 [inlined]
[20] macro expansion
@ ~/.julia/packages/Metal/av3If/src/compiler/execution.jl:85 [inlined]
[21] _copyto!
@ ~/.julia/packages/Metal/av3If/src/broadcast.jl:104 [inlined]
[22] materialize!
@ ~/.julia/packages/Metal/av3If/src/broadcast.jl:48 [inlined]
[23] materialize!
@ ./broadcast.jl:911 [inlined]
[24] copyto!(dest::Vector{Float32}, A::ArrayPartition{Float32, Tuple{MtlArray{…}, MtlArray{…}, MtlArray{…}}})
@ RecursiveArrayTools ~/.julia/packages/RecursiveArrayTools/cqMRn/src/array_partition.jl:193
[25] copyto_axcheck!
@ ./abstractarray.jl:1176 [inlined]
[26] Vector{Float32}(x::ArrayPartition{Float32, Tuple{MtlArray{…}, MtlArray{…}, MtlArray{…}}})
@ Base ./array.jl:673
[27] Array
@ ./boot.jl:498 [inlined]
[28] zeromatrix(A::ArrayPartition{Float32, Tuple{MtlArray{…}, MtlArray{…}, MtlArray{…}}})
@ RecursiveArrayTools ~/.julia/packages/RecursiveArrayTools/cqMRn/src/array_partition.jl:445
[29] build_J_W
@ ~/.julia/packages/OrdinaryDiffEqDifferentiation/TKWRC/src/derivative_utils.jl:845 [inlined]
[30] build_nlsolver(alg::Kvaerno5{…}, nlalg::NLNewton{…}, u::ArrayPartition{…}, uprev::ArrayPartition{…}, p::ArrayPartition{…}, t::Float32, dt::Float32, f::ODEFunction{…}, rate_prototype::ArrayPartition{…}, ::Type{…}, ::Type{…}, ::Type{…}, γ::Float32, c::Float32, α::Int64, ::Val{…})
@ OrdinaryDiffEqNonlinearSolve ~/.julia/packages/OrdinaryDiffEqNonlinearSolve/EABtI/src/utils.jl:213
[31] build_nlsolver
@ ~/.julia/packages/OrdinaryDiffEqNonlinearSolve/EABtI/src/utils.jl:152 [inlined]
[32] build_nlsolver
@ ~/.julia/packages/OrdinaryDiffEqNonlinearSolve/EABtI/src/utils.jl:142 [inlined]
[33] alg_cache(alg::Kvaerno5{…}, u::ArrayPartition{…}, rate_prototype::ArrayPartition{…}, ::Type{…}, ::Type{…}, ::Type{…}, uprev::ArrayPartition{…}, uprev2::ArrayPartition{…}, f::ODEFunction{…}, t::Float32, dt::Float32, reltol::Float32, p::ArrayPartition{…}, calck::Bool, ::Val{…})
@ OrdinaryDiffEqSDIRK ~/.julia/packages/OrdinaryDiffEqSDIRK/Dkxij/src/kencarp_kvaerno_caches.jl:362
[34] __init(prob::ODEProblem{…}, alg::Kvaerno5{…}, timeseries_init::Tuple{}, ts_init::Tuple{}, ks_init::Tuple{}; saveat::Tuple{}, tstops::Tuple{}, d_discontinuities::Tuple{}, save_idxs::Nothing, save_everystep::Bool, save_on::Bool, save_discretes::Bool, save_start::Bool, save_end::Nothing, callback::Nothing, dense::Bool, calck::Bool, dt::Float32, dtmin::Float32, dtmax::Float32, force_dtmin::Bool, adaptive::Bool, gamma::Rational{…}, abstol::Float32, reltol::Float32, qmin::Rational{…}, qmax::Int64, qsteady_min::Int64, qsteady_max::Rational{…}, beta1::Nothing, beta2::Nothing, qoldinit::Rational{…}, controller::Nothing, fullnormalize::Bool, failfactor::Int64, maxiters::Int64, internalnorm::typeof(DiffEqBase.ODE_DEFAULT_NORM), internalopnorm::typeof(LinearAlgebra.opnorm), isoutofdomain::typeof(DiffEqBase.ODE_DEFAULT_ISOUTOFDOMAIN), unstable_check::typeof(DiffEqBase.ODE_DEFAULT_UNSTABLE_CHECK), verbose::Bool, timeseries_errors::Bool, dense_errors::Bool, advance_to_tstop::Bool, stop_at_next_tstop::Bool, initialize_save::Bool, progress::Bool, progress_steps::Int64, progress_name::String, progress_message::typeof(DiffEqBase.ODE_DEFAULT_PROG_MESSAGE), progress_id::Symbol, userdata::Nothing, allow_extrapolation::Bool, initialize_integrator::Bool, alias::ODEAliasSpecifier, initializealg::DiffEqBase.DefaultInit, kwargs::@Kwargs{})
@ OrdinaryDiffEqCore ~/.julia/packages/OrdinaryDiffEqCore/GMkz9/src/solve.jl:410
[35] __init (repeats 2 times)
@ ~/.julia/packages/OrdinaryDiffEqCore/GMkz9/src/solve.jl:11 [inlined]
[36] __solve(::ODEProblem{…}, ::Kvaerno5{…}; kwargs::@Kwargs{…})
@ OrdinaryDiffEqCore ~/.julia/packages/OrdinaryDiffEqCore/GMkz9/src/solve.jl:6
[37] __solve
@ ~/.julia/packages/OrdinaryDiffEqCore/GMkz9/src/solve.jl:1 [inlined]
[38] solve_call(_prob::ODEProblem{…}, args::Kvaerno5{…}; merge_callbacks::Bool, kwargshandle::Nothing, kwargs::@Kwargs{…})
@ DiffEqBase ~/.julia/packages/DiffEqBase/aB45d/src/solve.jl:127
[39] solve_call
@ ~/.julia/packages/DiffEqBase/aB45d/src/solve.jl:84 [inlined]
[40] #solve_up#39
@ ~/.julia/packages/DiffEqBase/aB45d/src/solve.jl:563 [inlined]
[41] solve_up
@ ~/.julia/packages/DiffEqBase/aB45d/src/solve.jl:540 [inlined]
[42] #solve#38
@ ~/.julia/packages/DiffEqBase/aB45d/src/solve.jl:530 [inlined]
[43] top-level scope
@ REPL[13]:1
[44] top-level scope
@ ~/.julia/packages/Metal/av3If/src/initialization.jl:80