Skip to content

External functions in GPU ODE example #496

@H-Sax

Description

@H-Sax

When writing an ODE function to utilise the DiffEqGPU package external functions can not be inferred in the definition. However when I write the functions within the ODE definition the solve computes on the GPU as example.

Working Example

using DiffEqGPU, OrdinaryDiffEq, StaticArrays, CUDA, Statistics,  QuasiMonteCarlo, GlobalSensitivity, LinearAlgebra

 
function NIK(u, p, t)
    τₑₛ = p[1] 
    τₑₚ = p[2]
    Rmv = p[3]
    Zao = p[4] 
    Rs  = p[5] 
    Csa = p[6] 
    Csv = p[7] 
    Eₘₐₓ =p[8]  
    Eₘᵢₙ =p[9]
    # pressures (more readable names)
# the differential equations
    tᵢ = (t +  1.0f0) % 1.0f0

    DEₚ = (tᵢ <= τₑₛ) * (π*1.0f0) / τₑₛ * sin(tᵢ / τₑₛ * (π*1.0f0)) / 2.0f0 +
      (tᵢ > τₑₛ) * (tᵢ <= τₑₚ) * (π*1.0f0) / (τₑₚ - τₑₛ) * sin((τₑₛ - tᵢ) / (τₑₚ - τₑₛ) * (π*1.0f0)) / 2.0f0
    (tᵢ <= τₑₚ) * 0.0f0

    DE = (Eₘₐₓ - Eₘᵢₙ) * DEₚ

    Eₚ = (tᵢ <= τₑₛ) * (1.0f0 - cos(tᵢ / τₑₛ * (π*1.0f0))) / 2.0f0 +
    (tᵢ > τₑₛ) * (tᵢ <= τₑₚ) * (1.0f0 + cos((tᵢ - τₑₛ) / (τₑₚ - τₑₛ) * (π*1.0f0))) / 2.0f0 +
    (tᵢ <= τₑₚ) * 0.0f0

    E = Eₘᵢₙ + (Eₘₐₓ - Eₘᵢₙ) * Eₚ

    du1 = (u[6] - u[5]) * E + (u[1] / E) * DE # Left ventricle
    du2 = (u[5] - u[7] ) / Csa #Systemic arteries     
    du3 = (u[7] - u[6]) / Csv # Venous
    du4 = u[6] - u[5] # volume
    du5 = ifelse(-(u[1] - u[2]) < 0.0f0, (du1 - du2)/Zao, 0.0f0) #Valve(Zao, (du1 - du2), u[1] - u[2])  # AV 
    du6 = ifelse(-(u[3] - u[1]) < 0.0f0,(du3 - du1)/Rmv, 0.0f0) #Valve(Rmv, (du3 - du1), u[3] - u[1])  # MV
    du7 = (du2 - du3) / Rs # Systemic flow
    return SVector{7}(du1,du2,du3,du4,du5,du6,du7)
end
##

u0 = @SVector [6.0f0, 6.0f0, 6.0f0, 200.0f0, 0.0f0, 0.0f0, 0.0f0]

p = @SVector [0.3f0, 0.45f0, 0.006f0, 0.033f0, 1.11f0, 1.13f0, 11.0f0, 1.5f0, 0.03f0]

tspan = (0f0, 10f0)

prob = ODEProblem{false}(NIK, u0, tspan, p)

x = collect(range(7.0f0, stop = 8.0f0, length = 250))

#Perform GSA
f1 = function (p)
    prob_func(prob, i, repeat) = remake(prob; p = (p[:, i]))
    ensemble_prob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false)
    sol = solve(
        ensemble_prob, GPUVern7(),  EnsembleGPUKernel(CUDA.CUDABackend()); saveat = x, trajectories = size(p, 2))
    # Now sol[i] is the solution for the ith set of parameters
    out = zeros(2, size(p, 2))
    for i in 1:size(p, 2)
        out[1, i] = mean(sol[i][1, :]) #Mean LVP
        out[2, i] = maximum(sol[i][2, :]) # Max Psa
    end
    out
end

samples = 100000
lb = @SVector [0.27f0, 0.405f0, 0.0054f0, 0.0297f0, 0.999f0, 1.017f0, 09.9f0, 1.35f0, 0.027f0]
ub = @SVector [0.33f0, 0.495f0, 0.0066f0, 0.0363f0, 1.221f0, 1.243f0, 12.1f0, 1.65f0, 0.033f0]
sampler = SobolSample()
A, B = QuasiMonteCarlo.generate_design_matrices(samples, lb, ub, sampler)

@time sobol_result = gsa(f1, Sobol(), A, B, batch = true)

Example that Errors

using DiffEqGPU, OrdinaryDiffEq, StaticArrays, CUDA, Statistics, Distributions, QuasiMonteCarlo, JLD, GlobalSensitivity, LinearAlgebra

function Valve(R, deltaP, open)
    dq = 0.0f0
    if (-open) < 0.0f0 
        dq =  deltaP/R
    else
        dq = 0.0f0
    end
    return SVector{1}(dq)

end

function ShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ)
    τ = 1.0f0
    Eshift = 0.0f0

    tᵢ = rem(t + (1.0f0 - Eshift) * τ, τ)

    Eₚ = (tᵢ <= τₑₛ) * (1.0f0 - cos(tᵢ / τₑₛ * (π*1.0f0))) / 2.0f0 +
         (tᵢ > τₑₛ) * (tᵢ <= τₑₚ) * (1.0f0 + cos((tᵢ - τₑₛ) / (τₑₚ - τₑₛ) * (π*1.0f0))) / 2.0f0 +
         (tᵢ <= τₑₚ) * 0.0f0

    E = Eₘᵢₙ + (Eₘₐₓ - Eₘᵢₙ) * Eₚ

    return SVector{1}(E)
end

function DShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ)
    τ = 1.0f0
    Eshift = 0.0f0

    tᵢ = rem(t + (1.0f0 - Eshift) * τ, τ)

    DEₚ = (tᵢ <= τₑₛ) * (π*1.0f0) / τₑₛ * sin(tᵢ / τₑₛ * (π*1.0f0)) / 2.0f0 +
          (tᵢ > τₑₛ) * (tᵢ <= τₑₚ) * (π*1.0f0) / (τₑₚ - τₑₛ) * sin((τₑₛ - tᵢ) / (τₑₚ - τₑₛ) * (π*1.0f0)) / 2.0f0
    (tᵢ <= τₑₚ) * 0.0f0
    DE = (Eₘₐₓ - Eₘᵢₙ) * DEₚ

    return SVector{1}(DE)
end

 
function NIK(u, p, t)
    τₑₛ = p[1] 
    τₑₚ = p[2]
    Rmv = p[3]
    Zao = p[4] 
    Rs  = p[5] 
    Csa = p[6] 
    Csv = p[7] 
    Eₘₐₓ =p[8]  
    Eₘᵢₙ =p[9]
    # pressures (more readable names)
# the differential equations
    du1 = (u[6] - u[5]) * ShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ) + u[1] / ShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ) * DShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ)
    du2 = (u[5] - u[7] ) / Csa #Systemic arteries     
    du3 = (u[7] - u[6]) / Csv # Venous
    du4 = u[6] - u[5] # volume
    du5    = Valve(Zao, (du1 - du2), u[1] - u[2])  # AV 
    du6   = Valve(Rmv, (du3 - du1), u[3] - u[1])  # MV
    du7     = (du2 - du3) / Rs # Systemic flow
    return SVector{7}(du1,du2,du3,du4,du5,du6,du7)
end
##

u0 = @SVector [6.0f0, 6.0f0, 6.0f0, 200.0f0, 0.0f0, 0.0f0, 0.0f0]

p = @SVector [0.3f0, 0.45f0, 0.006f0, 0.033f0, 1.11f0, 1.13f0, 11.0f0, 1.5f0, 0.03f0]

tspan = (0f0, 10f0)

prob = ODEProblem{false}(NIK, u0, tspan, p)

x = collect(range(7.0f0, stop = 8.0f0, length = 250))

#Perform GSA
f1 = function (p)
    prob_func(prob, i, repeat) = remake(prob; p = p[:, i])
    ensemble_prob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false)
    sol = solve(
        ensemble_prob, GPUVern7(),  reltol = 1e-5, abstol = 1e-5, EnsembleGPUKernel(CUDA.CUDABackend()); saveat = x, trajectories = size(p, 2))
    # Now sol[i] is the solution for the ith set of parameters
    out = zeros(2, size(p, 2))
    for i in 1:size(p, 2)
        out[1, i] = mean(sol[i][1, :]) #Mean LVP
        out[2, i] = maximum(sol[i][2, :]) # Max Psa
    end
    out
end

samples = 100000
lb = @SVector [0.27f0, 0.405f0, 0.0054f0, 0.0297f0, 0.999f0, 1.017f0, 09.9f0, 1.35f0, 0.027f0]
ub = @SVector [0.33f0, 0.495f0, 0.0066f0, 0.0363f0, 1.221f0, 1.243f0, 12.1f0, 1.65f0, 0.033f0]
sampler = SobolSample()
A, B = QuasiMonteCarlo.generate_design_matrices(samples, lb, ub, sampler)

@time sobol_result = gsa(f1, Sobol(), A, B, batch = true)

Stacktrace

ERROR: LoadError: InvalidIRError: compiling MethodInstance for DiffEqGPU.gpu_ode_asolve_kernel(::KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, ::CuDeviceVector{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, @NamedTuple{}}, SciMLBase.StandardODEProblem}, 1}, ::GPUVern7, ::CuDeviceMatrix{SVector{7, Float32}, 1}, ::CuDeviceMatrix{Float32, 1}, ::Float32, ::CallbackSet{Tuple{}, Tuple{}}, ::Nothing, ::Float32, ::Float32, ::CuDeviceVector{Float32, 1}, ::Val{false}) resulted in invalid LLVM IR
Reason: unsupported call to an unknown function (call to julia.new_gc_frame)
Reason: unsupported call to an unknown function (call to julia.push_gc_frame)
Reason: unsupported call to an unknown function (call to julia.get_gc_frame_slot)
Reason: unsupported dynamic function invocation (call to (f::ODEFunction)(args...) @ SciMLBase ~/.julia/packages/SciMLBase/DbQVU/src/scimlfunctions.jl:2290)
Stacktrace:
 [1] step!
   @ ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpukernel/perform_step/gpu_vern7_perform_step.jl:104
 [2] macro expansion
   @ ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpukernel/kernels.jl:97
 [3] gpu_ode_asolve_kernel
   @ ~/.julia/packages/KernelAbstractions/zPAn3/src/macros.jl:95
 [4] gpu_ode_asolve_kernel
   @ ./none:0
Reason: unsupported call to an unknown function (call to julia.pop_gc_frame)
Stacktrace:
  [1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, args::LLVM.Module)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/validation.jl:147
  [2] macro expansion
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:445 [inlined]
  [3] macro expansion
    @ ~/.julia/packages/TimerOutputs/RsWnF/src/TimerOutput.jl:253 [inlined]
  [4] macro expansion
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:444 [inlined]
  [5] emit_llvm(job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, only_entry::Bool, validate::Bool)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/utils.jl:92
  [6] emit_llvm
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/utils.jl:86 [inlined]
  [7] codegen(output::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:134
  [8] codegen
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:115 [inlined]
  [9] compile(target::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:111
 [10] compile
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:103 [inlined]
 [11] #1116
    @ ~/.julia/packages/CUDA/jdJ7Z/src/compiler/compilation.jl:247 [inlined]
 [12] JuliaContext(f::CUDA.var"#1116#1119"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:52
 [13] JuliaContext(f::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:42
 [14] compile(job::GPUCompiler.CompilerJob)
    @ CUDA ~/.julia/packages/CUDA/jdJ7Z/src/compiler/compilation.jl:246
 [15] actual_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/execution.jl:128
 [16] cached_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/execution.jl:103
 [17] macro expansion
    @ ~/.julia/packages/CUDA/jdJ7Z/src/compiler/execution.jl:367 [inlined]
 [18] macro expansion
    @ ./lock.jl:267 [inlined]
 [19] cufunction(f::typeof(DiffEqGPU.gpu_ode_asolve_kernel), tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, CuDeviceVector{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, 1}, GPUVern7, CuDeviceMatrix{SVector{7, Float32}, 1}, CuDeviceMatrix{Float32, 1}, Float32, CallbackSet{Tuple{}, Tuple{}}, Nothing, Float32, Float32, CuDeviceVector{Float32, 1}, Val{false}}}; kwargs::@Kwargs{always_inline::Bool, maxthreads::Nothing})
    @ CUDA ~/.julia/packages/CUDA/jdJ7Z/src/compiler/execution.jl:362
 [20] macro expansion
    @ ~/.julia/packages/CUDA/jdJ7Z/src/compiler/execution.jl:112 [inlined]
 [21] (::KernelAbstractions.Kernel{CUDABackend, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, typeof(DiffEqGPU.gpu_ode_asolve_kernel)})(::CuArray{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, 1, CUDA.Mem.DeviceBuffer}, ::Vararg{Any}; ndrange::Int64, workgroupsize::Nothing)
    @ CUDA.CUDAKernels ~/.julia/packages/CUDA/jdJ7Z/src/CUDAKernels.jl:119
 [22] Kernel
    @ ~/.julia/packages/CUDA/jdJ7Z/src/CUDAKernels.jl:105 [inlined]
 [23] vectorized_asolve(probs::CuArray{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, 1, CUDA.Mem.DeviceBuffer}, prob::ODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, alg::GPUVern7; dt::Float32, saveat::Vector{Float32}, save_everystep::Bool, abstol::Float64, reltol::Float64, debug::Bool, callback::CallbackSet{Tuple{}, Tuple{}}, tstops::Nothing, kwargs::@Kwargs{unstable_check::DiffEqGPU.var"#114#120"})
    @ DiffEqGPU ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpukernel/lowerlevel_solve.jl:232
 [24] vectorized_asolve
    @ ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpukernel/lowerlevel_solve.jl:179 [inlined]
 [25] batch_solve_up_kernel(ensembleprob::EnsembleProblem{ODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, var"#prob_func#3"{Matrix{Float32}}, typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, probs::Vector{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}}, alg::GPUVern7, ensemblealg::EnsembleGPUKernel{CUDABackend}, I::UnitRange{Int64}, adaptive::Bool; kwargs::@Kwargs{saveat::Vector{Float32}, unstable_check::DiffEqGPU.var"#114#120", reltol::Float64, abstol::Float64})
    @ DiffEqGPU ~/.julia/packages/DiffEqGPU/I999k/src/solve.jl:267
 [26] batch_solve(ensembleprob::EnsembleProblem{ODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, var"#prob_func#3"{Matrix{Float32}}, typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, alg::GPUVern7, ensemblealg::EnsembleGPUKernel{CUDABackend}, I::UnitRange{Int64}, adaptive::Bool; kwargs::@Kwargs{unstable_check::DiffEqGPU.var"#114#120", reltol::Float64, abstol::Float64, saveat::Vector{Float32}})
    @ DiffEqGPU ~/.julia/packages/DiffEqGPU/I999k/src/solve.jl:168
 [27] macro expansion
    @ ./timing.jl:395 [inlined]
 [28] __solve(ensembleprob::EnsembleProblem{ODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, var"#prob_func#3"{Matrix{Float32}}, typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, alg::GPUVern7, ensemblealg::EnsembleGPUKernel{CUDABackend}; trajectories::Int64, batch_size::Int64, unstable_check::Function, adaptive::Bool, kwargs::@Kwargs{reltol::Float64, abstol::Float64, saveat::Vector{Float32}})
    @ DiffEqGPU ~/.julia/packages/DiffEqGPU/I999k/src/solve.jl:55
 [29] __solve
    @ ~/.julia/packages/DiffEqGPU/I999k/src/solve.jl:1 [inlined]
 [30] #solve#55
    @ ~/.julia/packages/DiffEqBase/O8cUq/src/solve.jl:1096 [inlined]
 [31] (::var"#1#2")(p::Matrix{Float32})
    @ Main ~/harry/NIK_GPU.jl:81
 [32] gsa(f::var"#1#2", method::Sobol, A::Matrix{Float32}, B::Matrix{Float32}; batch::Bool, Ei_estimator::Symbol, distributed::Val{false}, kwargs::@Kwargs{})
    @ GlobalSensitivity ~/.julia/packages/GlobalSensitivity/pLty1/src/sobol_sensitivity.jl:138
 [33] macro expansion
    @ ./timing.jl:279 [inlined]
 [34] top-level scope
    @ ~/harry/NIK_GPU.jl:269
in expression starting at /net/people/plgrid/plghs1454/harry/NIK_GPU.jl:98

This issue is prompted from a discussion on discorse herehttps://discourse.julialang.org/t/external-functions-in-gpu-ode-example/116719

As far as I understand as long as external functions are GPU compatible they should then be inferred by the ode function, this is motivated by discussions at JuliaCon.

Thanks :)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions