Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minimal CUDA version for Lux Dense Layer #1442

Open
avik-pal opened this issue May 13, 2024 · 7 comments
Open

Minimal CUDA version for Lux Dense Layer #1442

avik-pal opened this issue May 13, 2024 · 7 comments

Comments

@avik-pal
Copy link
Contributor

avik-pal commented May 13, 2024

using CUDA, Enzyme

w = cu(rand(Float32, 10, 10))
b = cu(rand(Float32, 10))
x = cu(rand(Float32, 10, 10))
act = x -> max(0f0, x)

function loss_function(act, w, x, b)
    return sum(abs2, act.(muladd(w, x, b)))
    # return sum(abs2, fused_dense_bias_activation(act, w, x, b))
end

begin
    dw = zero(w)
    dx = zero(x)
    db = zero(b)
    
    Enzyme.autodiff(Reverse, loss_function, Active, Const(act), Duplicated(w, dw), Duplicated(x, dx), Duplicated(b, db))
end

Fails with

ERROR: LLVM error: function failed verification (4)
Stacktrace:
  [1] handle_error(reason::Cstring)
    @ LLVM ~/.julia/packages/LLVM/bzSzE/src/core/context.jl:168
  [2] EnzymeCreatePrimalAndGradient(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{…}, TA::Enzyme.TypeAnalysis, returnValue::Bool, dretUsed::Bool, mode::Enzyme.API.CDerivativeMode, width::Int64, additionalArg::Ptr{…}, forceAnonymousTape::Bool, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{…}, augmented::Ptr{…}, atomicAdd::Bool)
    @ Enzyme.API ~/.julia/packages/Enzyme/2FwRI/src/api.jl:154
  [3] enzyme!(job::GPUCompiler.CompilerJob{…}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::NTuple{…}, returnPrimal::Bool, expectedTapeType::Type, loweredArgs::Set{…}, boxedArgs::Set{…})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/2FwRI/src/compiler.jl:3177
  [4] codegen(output::Symbol, job::GPUCompiler.CompilerJob{…}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/2FwRI/src/compiler.jl:5070
  [5] codegen
    @ ~/.julia/packages/Enzyme/2FwRI/src/compiler.jl:4477 [inlined]
  [6] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, postopt::Bool)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/2FwRI/src/compiler.jl:5755
  [7] _thunk
    @ ~/.julia/packages/Enzyme/2FwRI/src/compiler.jl:5755 [inlined]
  [8] cached_compilation
    @ ~/.julia/packages/Enzyme/2FwRI/src/compiler.jl:5793 [inlined]
  [9] (::Enzyme.Compiler.var"#554#555"{})(ctx::LLVM.Context)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/2FwRI/src/compiler.jl:5859
 [10] JuliaContext(f::Enzyme.Compiler.var"#554#555"{}; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:52
 [11] JuliaContext(f::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:42
 [12] #s2027#553
    @ ~/.julia/packages/Enzyme/2FwRI/src/compiler.jl:5811 [inlined]
 [13] 
    @ Enzyme.Compiler ./none:0
 [14] (::Core.GeneratedFunctionStub)(::UInt64, ::LineNumberNode, ::Any, ::Vararg{Any})
    @ Core ./boot.jl:602
 [15] autodiff
    @ ~/.julia/packages/Enzyme/2FwRI/src/Enzyme.jl:286 [inlined]
 [16] autodiff(::ReverseMode{…}, ::typeof(loss_function), ::Type{…}, ::Const{…}, ::Duplicated{…}, ::Duplicated{…}, ::Duplicated{…})
    @ Enzyme ~/.julia/packages/Enzyme/2FwRI/src/Enzyme.jl:303
 [17] top-level scope
    @ /mnt/research/ongoing/lux/enzyme_cuda.jl:18
Some type information was truncated. Use `show(err)` to see complete types

and a very long LLVM dump.

Details

JIT session error: Symbols not found: [ cuMemsetD8 ]
JIT session error: Symbols not found: [ cuMemsetD8 ]
JIT session error: Symbols not found: [ cuMemsetD8 ]
JIT session error: Symbols not found: [ cuMemsetD8 ]
JIT session error: Symbols not found: [ cuMemsetD8 ]
JIT session error: Symbols not found: [ cuMemsetD8 ]
┌ Warning: The Pkg REPL mode is intended for interactive use only, and should not be used from scripts. It is recommended to use the functional API instead.
└ @ Pkg.REPLMode ~/.julia/juliaup/julia-1.10.3+0.x64.linux.gnu/share/julia/stdlib/v1.10/Pkg/src/REPLMode/REPLMode.jl:382
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] loss_function
│ @ /mnt/research/ongoing/lux/enzyme_cuda.jl:9
│ arg = %active_repl.checked = load atomic {} addrspace(10), {} addrspace(10)** inttoptr (i64 138527901362528 to {} addrspace(10)**) unordered, align 8, !dbg !439, !tbaa !440, !alias.scope !431, !noalias !434
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] loss_function
│ @ /mnt/research/ongoing/lux/enzyme_cuda.jl:9
│ arg = %active_repl.checked = load atomic {} addrspace(10)
, {} addrspace(10)** inttoptr (i64 138527901362528 to {} addrspace(10)) unordered, align 8, !dbg !439, !tbaa !440, !alias.scope !431, !noalias !434
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] loss_function
│ @ /mnt/research/ongoing/lux/enzyme_cuda.jl:9
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 138527901362528 to {} addrspace(10)) unordered, align 8, !dbg !439, !tbaa !440, !alias.scope !431, !noalias !434
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] loss_function
│ @ /mnt/research/ongoing/lux/enzyme_cuda.jl:9
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 138527901362528 to {} addrspace(10)) unordered, align 8, !dbg !439, !tbaa !440, !alias.scope !431, !noalias !434
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] loss_function
│ @ /mnt/research/ongoing/lux/enzyme_cuda.jl:9
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 138527901362528 to {} addrspace(10)) unordered, align 8, !dbg !439, !tbaa !440, !alias.scope !431, !noalias !434
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] loss_function
│ @ /mnt/research/ongoing/lux/enzyme_cuda.jl:9
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 138527901362528 to {} addrspace(10)) unordered, align 8, !dbg !439, !tbaa !440, !alias.scope !431, !noalias !434
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] loss_function
│ @ /mnt/research/ongoing/lux/enzyme_cuda.jl:9
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 138527901362528 to {} addrspace(10)) unordered, align 8, !dbg !439, !tbaa !440, !alias.scope !431, !noalias !434
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] loss_function
│ @ /mnt/research/ongoing/lux/enzyme_cuda.jl:9
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 138527901362528 to {} addrspace(10)) unordered, align 8, !dbg !439, !tbaa !440, !alias.scope !431, !noalias !434
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] loss_function
│ @ /mnt/research/ongoing/lux/enzyme_cuda.jl:9
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 138527901362528 to {} addrspace(10)) unordered, align 8, !dbg !439, !tbaa !440, !alias.scope !431, !noalias !434
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] loss_function
│ @ /mnt/research/ongoing/lux/enzyme_cuda.jl:9
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 138527901362528 to {} addrspace(10)) unordered, align 8, !dbg !439, !tbaa !440, !alias.scope !431, !noalias !434
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] loss_function
│ @ /mnt/research/ongoing/lux/enzyme_cuda.jl:9
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 138527901362528 to {} addrspace(10)) unordered, align 8, !dbg !439, !tbaa !440, !alias.scope !431, !noalias !434
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] loss_function
│ @ /mnt/research/ongoing/lux/enzyme_cuda.jl:9
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 138527901362528 to {} addrspace(10)**) unordered, align 8, !dbg !439, !tbaa !440, !alias.scope !431, !noalias !434
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
ERROR: LoadError: LLVM error: Failed to materialize symbols: { (JuliaExternal, { libname_cuCtxSetCurrent_40759, libname_cuMemHostAlloc_38650, libname_cuDeviceCanAccessPeer_41778, libname_cuCtxSetCurrent_40231, libname_cuStreamSynchronize_40575, libname_cuCtxSetCurrent_41988, libname_cuStreamGetCaptureInfo_41984, libname_cuCtxGetId_39658, libname_cublasGetProperty_39361, libname_cuMemPoolCreate_40279, libname_cuCtxPopCurrent_v2_40732, libname_cuCtxSetCurrent_41463, libname_cuMemAllocFromPoolAsync_38536, libname_cuMemPoolCreate_41596, libname_cuStreamQuery_41973, libname_cuCtxGetId_37318, libname_cuCtxPushCurrent_v2_40729, libname_cuMemPoolTrimTo_39311, libname_cuMemPoolSetAttribute_40353, libname_cublasSsymm_v2_64_39307, libname_cuDeviceGetAttribute_40201, libname_cuMemPoolCreate_40364, libname_cudaRuntimeGetVersion_39775, libname_cuCtxGetCurrent_40830, libname_cuMemAlloc_v2_38538, libname_cuMemsetD32Async_39068, libname_cuMemsetD32Async_39556, libname_cuCtxSetCurrent_40756, libname_cuMemPoolTrimTo_38488, libname_cuDeviceGet_41449, libname_cuStreamQuery_40740, libname_cuStreamGetCaptureInfo_40752, libname_cuStreamSynchronize_37225, libname_cuDeviceGetName_41015, libname_cuCtxSynchronize_38500, libname_cuCtxGetDevice_41967, libname_cuStreamCreate_41741, libname_cuMemGetInfo_v2_38102, libname_cuMemPoolGetAttribute_39590, libname_cuCtxGetCurrent_37272, libname_cuCtxGetApiVersion_41478, libname_cuCtxGetId_41476, libname_cuDeviceSetMemPool_38018, libname_cuLaunchKernel_38608, libname_cuOccupancyMaxPotentialBlockSize_38792, libname_cublasSgemm_v2_64_39435, libname_cuStreamSynchronize_37220, libname_cuMemGetInfo_v2_40442, libname_cuStreamSynchronize_41807, libname_cuDeviceGet_37877, libname_cuMemAlloc_v2_38530, libname_cuMemcpyDtoHAsync_v2_38437, libname_cuDevicePrimaryCtxRetain_37310, libname_cuDeviceCanAccessPeer_38375, libname_cuCtxSynchronize_38494, libname_cublasGetProperty_39352, libname_cuCtxGetDevice_38330, libname_cuPointerGetAttribute_38405, libname_cuStreamCreate_38169, libname_cuStreamCreate_40509, libname_cuStreamSynchronize_40580, libname_cuCtxSetCurrent_41991, libname_cuMemPoolTrimTo_39526, libname_cuMemPoolCreate_38024, libname_cuDeviceGet_40217, libname_cuDeviceGetAttribute_41433, libname_cuCtxSetCurrent_38409, libname_cuCtxGetCurrent_40285, libname_cuMemPoolTrimTo_39511, libname_cuMemPoolTrimTo_39395, libname_cuStreamQuery_41972, libname_cuCtxGetCurrent_37945, libname_cublasSscal_v2_39494, libname_cuMemPoolTrimTo_39462, libname_cuDeviceGetName_37443, libname_cuDeviceGetCount_39613, libname_cublasSgemm_v2_39442, libname_cuCtxSetCurrent_37891, libname_cuMemAllocAsync_38528, libname_cuMemPoolSetAttribute_38013, libname_cuCtxPushCurrent_v2_41961, libname_cuMemcpyDtoDAsync_v2_38454, libname_cuMemcpyHtoDAsync_v2_38348, libname_cuDeviceGetCount_37287, libname_cublasSetStream_v2_39406, libname_cuMemsetD32Async_40788, libname_cuStreamQuery_38336, libname_cuMemGetInfo_v2_41674, libname_cuMemPoolSetAccess_41767, libname_cuStreamSynchronize_41812, libname_cuMemPoolTrimTo_39365, libname_cuCtxGetDevice_40735, libname_cuCtxSetCurrent_38412, libname_cuStreamGetCaptureInfo_38400, libname_cuDeviceGetAttribute_37861, libname_cuStreamQuery_38335, libname_cuDeviceSetMemPool_40358, libname_cuMemPoolTrimTo_39480, libname_cuMemPoolSetAccess_38364, libname_cuCtxGetId_40890, libname_cublasSscal_v2_64_39523, libname_cudaRuntimeGetVersion_41007, libname_cuCtxGetId_37904, libname_cuMemPoolGetAttribute_37264, libname_cuDeviceGetCount_40845, libname_cublasSetMathMode_39329, libname_cuCtxGetCurrent_41517, libname_cuDeviceGetName_39783, libname_cuDevicePrimaryCtxRetain_40882, libname_cublasSsymm_v2_39283, libname_cublasSgemm_v2_39457, libname_cublasSscal_v2_39507, diffejulia_loss_function_37136wrap, libname_cublasSsymm_v2_64_39261, libname_cuMemPoolCreate_37939, libname_cuCtxPopCurrent_v2_38327, libname_cuMemPoolSetAccess_40535, libname_cuMemPoolCreate_41511, libname_cudaRuntimeGetVersion_37435, libname_cublasSscal_v2_64_39490, libname_cuMemPoolGetAttribute_40822, libname_cuMemPoolSetAttribute_41585, libname_cuMemPoolTrimTo_39288, libname_cuStreamQuery_40741, libname_cublasSetStream_v2_39392, libname_cuCtxGetApiVersion_40246, libname_cublasSetStream_v2_39382, libname_cuCtxSetCurrent_40832, libname_cuCtxGetId_40244, libname_cuCtxSetCurrent_39600, libname_cuDeviceCanAccessPeer_40546, libname_cuMemcpyPeerAsync_38458, libname_cuCtxPopCurrent_v2_41964, libname_cuCtxGetApiVersion_37906, libname_cublasSetMathMode_39340, libname_cuCtxGetCurrent_39598, libname_cublasSsymm_v2_39268, libname_cublasSgemm_v2_64_39476, libname_cuCtxSetCurrent_37274, libname_cuLaunchCooperativeKernel_38605, libname_cuDevicePrimaryCtxRetain_39650, libname_cuDeviceSetMemPool_41590, libname_cuMemPoolTrimTo_39343, libname_cuCtxPushCurrent_v2_38324 }) }
Stacktrace:
[1] macro expansion
@ ~/.julia/packages/LLVM/bzSzE/src/executionengine/utils.jl:32 [inlined]
[2] lookup
@ ~/.julia/packages/LLVM/bzSzE/src/orc.jl:434 [inlined]
[3] lookup
@ ~/.julia/packages/LLVM/bzSzE/src/orc.jl:433 [inlined]
[4] lookup
@ ~/.julia/packages/Enzyme/NVk8T/src/compiler/orcv2.jl:255 [inlined]
[5] _link(job::GPUCompiler.CompilerJob{…}, ::Tuple{…})
@ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/compiler.jl:5751
[6] cached_compilation
@ ~/.julia/packages/Enzyme/NVk8T/src/compiler.jl:5811 [inlined]
[7] (::Enzyme.Compiler.var"#562#563"{…})(ctx::LLVM.Context)
@ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/compiler.jl:5876
[8] JuliaContext(f::Enzyme.Compiler.var"#562#563"{…}; kwargs::@kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:52
[9] JuliaContext(f::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:42
[10] #s2025#561
@ ~/.julia/packages/Enzyme/NVk8T/src/compiler.jl:5828 [inlined]
[11]
@ Enzyme.Compiler ./none:0
[12] (::Core.GeneratedFunctionStub)(::UInt64, ::LineNumberNode, ::Any, ::Vararg{Any})
@ Core ./boot.jl:602
[13] autodiff
@ ~/.julia/packages/Enzyme/NVk8T/src/Enzyme.jl:286 [inlined]
[14] autodiff(::ReverseMode{…}, ::typeof(loss_function), ::Type{…}, ::Const{…}, ::Duplicated{…}, ::Duplicated{…}, ::Duplicated{…})
@ Enzyme ~/.julia/packages/Enzyme/NVk8T/src/Enzyme.jl:303
[15] top-level scope
@ /mnt/research/ongoing/lux/enzyme_cuda.jl:17
Some type information was truncated. Use show(err) to see complete types.
in expression starting at /mnt/research/ongoing/lux/enzyme_cuda.jl:12

@avik-pal
Copy link
Contributor Author

avik-pal commented May 13, 2024

Seems like it could stem from

function sum_loss(x)
    return sum(abs2, x)
end

begin
    x = cu(rand(Float32, 10, 10))
    dx = zero(x)
    Enzyme.autodiff(Reverse, sum_loss, Active, Duplicated(x, dx))
end

which is documented as non-functional, but how can we bypass it? We could write in the pullback form which prevents the reduction and that might work.

Details

JIT session error: Symbols not found: [ cuMemsetD8 ]
JIT session error: Symbols not found: [ cuMemsetD8 ]
JIT session error: Symbols not found: [ cuMemsetD8 ]
JIT session error: Symbols not found: [ cuMemsetD8 ]
JIT session error: Symbols not found: [ cuMemsetD8 ]
JIT session error: Symbols not found: [ cuMemsetD8 ]
┌ Warning: The Pkg REPL mode is intended for interactive use only, and should not be used from scripts. It is recommended to use the functional API instead.
└ @ Pkg.REPLMode ~/.julia/juliaup/julia-1.10.3+0.x64.linux.gnu/share/julia/stdlib/v1.10/Pkg/src/REPLMode/REPLMode.jl:382
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] sum_loss
│ @ /mnt/research/ongoing/lux/enzyme_cuda1.jl:4
│ arg = %active_repl.checked = load atomic {} addrspace(10), {} addrspace(10)** inttoptr (i64 134824559520096 to {} addrspace(10)**) unordered, align 8, !dbg !367, !tbaa !368, !alias.scope !359, !noalias !362
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] sum_loss
│ @ /mnt/research/ongoing/lux/enzyme_cuda1.jl:4
│ arg = %active_repl.checked = load atomic {} addrspace(10)
, {} addrspace(10)** inttoptr (i64 134824559520096 to {} addrspace(10)) unordered, align 8, !dbg !367, !tbaa !368, !alias.scope !359, !noalias !362
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] sum_loss
│ @ /mnt/research/ongoing/lux/enzyme_cuda1.jl:4
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 134824559520096 to {} addrspace(10)) unordered, align 8, !dbg !367, !tbaa !368, !alias.scope !359, !noalias !362
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] sum_loss
│ @ /mnt/research/ongoing/lux/enzyme_cuda1.jl:4
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 134824559520096 to {} addrspace(10)) unordered, align 8, !dbg !367, !tbaa !368, !alias.scope !359, !noalias !362
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] sum_loss
│ @ /mnt/research/ongoing/lux/enzyme_cuda1.jl:4
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 134824559520096 to {} addrspace(10)) unordered, align 8, !dbg !367, !tbaa !368, !alias.scope !359, !noalias !362
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] sum_loss
│ @ /mnt/research/ongoing/lux/enzyme_cuda1.jl:4
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 134824559520096 to {} addrspace(10)) unordered, align 8, !dbg !367, !tbaa !368, !alias.scope !359, !noalias !362
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] sum_loss
│ @ /mnt/research/ongoing/lux/enzyme_cuda1.jl:4
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 134824559520096 to {} addrspace(10)) unordered, align 8, !dbg !367, !tbaa !368, !alias.scope !359, !noalias !362
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] sum_loss
│ @ /mnt/research/ongoing/lux/enzyme_cuda1.jl:4
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 134824559520096 to {} addrspace(10)) unordered, align 8, !dbg !367, !tbaa !368, !alias.scope !359, !noalias !362
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] sum_loss
│ @ /mnt/research/ongoing/lux/enzyme_cuda1.jl:4
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 134824559520096 to {} addrspace(10)) unordered, align 8, !dbg !367, !tbaa !368, !alias.scope !359, !noalias !362
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] sum_loss
│ @ /mnt/research/ongoing/lux/enzyme_cuda1.jl:4
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 134824559520096 to {} addrspace(10)) unordered, align 8, !dbg !367, !tbaa !368, !alias.scope !359, !noalias !362
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] sum_loss
│ @ /mnt/research/ongoing/lux/enzyme_cuda1.jl:4
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 134824559520096 to {} addrspace(10)) unordered, align 8, !dbg !367, !tbaa !368, !alias.scope !359, !noalias !362
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
┌ Error: Found null pointer at

│ Stacktrace:
│ [1] get_repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:63
│ [2] repl_frontend_task
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:56
│ [3] default_scalar_indexing
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:75
│ [4] assertscalar
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:105
│ [5] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:48
│ [6] scalar_getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:34
│ [7] _getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:17
│ [8] getindex
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/indexing.jl:15
│ [9] macro expansion
│ @ ~/.julia/packages/GPUArraysCore/GMsgk/src/GPUArraysCore.jl:210
│ [10] #_mapreduce#42
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:71
│ [11] _mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:33
│ [12] mapreduce
│ @ ~/.julia/packages/GPUArrays/OKkAu/src/host/mapreduce.jl:28
│ [13] _sum
│ @ ./reducedim.jl:1015
│ [14] sum
│ @ ./reducedim.jl:1011
│ [15] sum_loss
│ @ /mnt/research/ongoing/lux/enzyme_cuda1.jl:4
│ arg = %active_repl.checked = load atomic {} addrspace(10)*, {} addrspace(10)
inttoptr (i64 134824559520096 to {} addrspace(10)**) unordered, align 8, !dbg !367, !tbaa !368, !alias.scope !359, !noalias !362
└ @ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/absint.jl:121
ERROR: LoadError: LLVM error: Failed to materialize symbols: { (JuliaExternal, { libname_cuCtxSetCurrent_34777, libname_cuMemAlloc_v2_32950, libname_cuCtxGetCurrent_33543, libname_cuMemPoolCreate_35456, libname_cuStreamSynchronize_34520, libname_cuMemPoolSetAccess_33207, libname_cuMemPoolCreate_34309, libname_cuMemPoolGetAttribute_31973, libname_cuCtxGetCurrent_34230, libname_cuDeviceGetName_34960, libname_cuCtxSetCurrent_35408, libname_cuStreamQuery_34686, libname_cuCtxSetCurrent_34704, libname_cuMemGetInfo_v2_35619, libname_cuMemsetD32Async_34733, libname_cuStreamSynchronize_32980, libname_cuDeviceGet_32725, libname_cuCtxGetId_33603, libname_cuStreamQuery_35918, libname_cuStreamCreate_34454, diffejulia_sum_loss_31707wrap, libname_cuStreamGetCaptureInfo_35929, libname_cuCtxPushCurrent_v2_35906, libname_cuMemPoolSetAccess_35712, libname_cuCtxSynchronize_32789, libname_cuCtxGetId_35421, libname_cuMemPoolSetAttribute_34298, libname_cudaRuntimeGetVersion_32449, libname_cuCtxGetId_34189, libname_cuCtxGetDevice_32921, libname_cuCtxPushCurrent_v2_34674, libname_cuStreamQuery_32927, libname_cuCtxSetCurrent_34176, libname_cuCtxGetCurrent_31981, libname_cuMemPoolCreate_35541, libname_cuCtxPopCurrent_v2_32918, libname_cuCtxSetCurrent_33401, libname_cuCtxPopCurrent_v2_35909, libname_cuDeviceSetMemPool_34303, libname_cuDeviceGetAttribute_34146, libname_cuMemAllocFromPoolAsync_32956, libname_cuCtxPopCurrent_v2_34677, libname_cuCtxGetDevice_35912, libname_cuStreamSynchronize_35752, libname_cuDevicePrimaryCtxRetain_32339, libname_cuCtxSetCurrent_31983, libname_cudaRuntimeGetVersion_33720, libname_cuMemsetD32Async_33456, libname_cuStreamSynchronize_34525, libname_cuPointerGetAttribute_33397, libname_cuCtxGetId_34835, libname_cuDeviceCanAccessPeer_35723, libname_cuDeviceGet_34162, libname_cuMemGetInfo_v2_32215, libname_cuCtxSetCurrent_35936, libname_cuDeviceGetAttribute_35378, libname_cuDeviceSetMemPool_32108, libname_cuCtxSetCurrent_32739, libname_cuMemAllocAsync_32948, libname_cuMemPoolCreate_32016, libname_cuDeviceGetCount_34790, libname_cuCtxPushCurrent_v2_32915, libname_cuCtxGetApiVersion_34191, libname_cuDeviceGet_35394, libname_cuDeviceCanAccessPeer_34491, libname_cuMemPoolCreate_32114, libname_cuMemPoolSetAttribute_32103, libname_cuStreamCreate_35686, libname_cuMemcpyDtoHAsync_v2_33391, libname_cuCtxSetCurrent_34701, libname_cuMemsetD32Async_33501, libname_cuMemAlloc_v2_32958, libname_cuStreamGetCaptureInfo_33243, libname_cuMemGetInfo_v2_34387, libname_cuCtxGetId_32030, libname_cuMemHostAlloc_33053, libname_cuDeviceGetName_33728, libname_cuCtxSetCurrent_33545, libname_cuDeviceGetCount_33558, libname_cuCtxSynchronize_32783, libname_cuStreamQuery_32926, libname_cuCtxGetId_32752, libname_cuCtxGetCurrent_35462, libname_cuCtxGetCurrent_32022, libname_cuCtxGetDevice_34680, libname_cuCtxGetApiVersion_32754, libname_cudaRuntimeGetVersion_34952, libname_cuMemPoolGetAttribute_33535, libname_cuStreamQuery_35917, libname_cuStreamGetCaptureInfo_34697, libname_cuDeviceGetCount_31939, libname_cuDevicePrimaryCtxRetain_34827, libname_cuMemPoolSetAttribute_35530, libname_cuOccupancyMaxPotentialBlockSize_33265, libname_cuStreamSynchronize_35757, libname_cuStreamSynchronize_32975, libname_cuDeviceSetMemPool_35535, libname_cuMemPoolSetAccess_34480, libname_cuCtxGetCurrent_34775, libname_cuDeviceGetName_32455, libname_cuDeviceCanAccessPeer_33218, libname_cuCtxSetCurrent_35933, libname_cuCtxGetApiVersion_35423, libname_cuCtxSetCurrent_33404, libname_cuMemPoolTrimTo_32777, libname_cuMemPoolCreate_34224, libname_cuDeviceGetAttribute_32277, libname_cuStreamCreate_32305, libname_cuMemPoolGetAttribute_34767, libname_cuDevicePrimaryCtxRetain_33595, libname_cuStreamQuery_34685 }) }
Stacktrace:
[1] macro expansion
@ ~/.julia/packages/LLVM/bzSzE/src/executionengine/utils.jl:32 [inlined]
[2] lookup
@ ~/.julia/packages/LLVM/bzSzE/src/orc.jl:434 [inlined]
[3] lookup
@ ~/.julia/packages/LLVM/bzSzE/src/orc.jl:433 [inlined]
[4] lookup
@ ~/.julia/packages/Enzyme/NVk8T/src/compiler/orcv2.jl:255 [inlined]
[5] _link(job::GPUCompiler.CompilerJob{…}, ::Tuple{…})
@ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/compiler.jl:5751
[6] cached_compilation
@ ~/.julia/packages/Enzyme/NVk8T/src/compiler.jl:5811 [inlined]
[7] (::Enzyme.Compiler.var"#562#563"{…})(ctx::LLVM.Context)
@ Enzyme.Compiler ~/.julia/packages/Enzyme/NVk8T/src/compiler.jl:5876
[8] JuliaContext(f::Enzyme.Compiler.var"#562#563"{…}; kwargs::@kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:52
[9] JuliaContext(f::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:42
[10] #s2025#561
@ ~/.julia/packages/Enzyme/NVk8T/src/compiler.jl:5828 [inlined]
[11]
@ Enzyme.Compiler ./none:0
[12] (::Core.GeneratedFunctionStub)(::UInt64, ::LineNumberNode, ::Any, ::Vararg{Any})
@ Core ./boot.jl:602
[13] autodiff
@ ~/.julia/packages/Enzyme/NVk8T/src/Enzyme.jl:286 [inlined]
[14] autodiff(mode::ReverseMode{false, FFIABI, false}, f::typeof(sum_loss), ::Type{Active}, args::Duplicated{CuArray{…}})
@ Enzyme ~/.julia/packages/Enzyme/NVk8T/src/Enzyme.jl:303
[15] top-level scope
@ /mnt/research/ongoing/lux/enzyme_cuda1.jl:10
Some type information was truncated. Use show(err) to see complete types.
in expression starting at /mnt/research/ongoing/lux/enzyme_cuda1.jl:7

@wsmoses
Copy link
Member

wsmoses commented May 13, 2024

Apparently the differentiation of CUmemPoolProps_st

@wsmoses
Copy link
Member

wsmoses commented May 13, 2024

@avik-pal with all of these issues can you upload the full error log?

@avik-pal
Copy link
Contributor Author

Added both the stacktraces with CUDA, cuDNN and Enzyme master

@wsmoses
Copy link
Member

wsmoses commented May 17, 2024

Oh cool so now with master your last one (#1442 (comment)) now successfully differentiates and hits a GPUCompiler / LLVM.jl / LLJIT related question that I may need some assist/explanation from @vchuravy / @maleadt

So quick tldr. The original code we AD has say a cublasgemm inside it. We take that code via gpucompiler and also inject a cudamemset call to it. then send it to our custom jit infra to deal compile/run (in 1.10+ thats now the lljit stuff @gbaraldi did)

Ignore the nullptr stuff for now.

The JIT now complains it can't find cuMemsetD8 (we specificallt don't call that we call cudaMemset, but presumably somewhere something says that should be called instead maybe or maybe cuda.jl itself calls it). Either way somehow we're not loading the symbols cuda needs (either from the ones we added and/or the ones from the original cuda program).

Any thoughts?

@wsmoses
Copy link
Member

wsmoses commented May 17, 2024

Basically as soon as we fix this we seem to successfully differentiate through cublasgemm (since at least generating the llvm module of the derivative has now succeeded!)

@wsmoses
Copy link
Member

wsmoses commented May 17, 2024

In these cases Enzyme clearly is not the originator of all these symbols:

ERROR: LoadError: LLVM error: Failed to materialize symbols: { (JuliaExternal, { libname_cuCtxSetCurrent_40759, libname_cuMemHostAlloc_38650, libname_cuDeviceCanAccessPeer_41778, libname_cuCtxSetCurrent_40231, libname_cuStreamSynchronize_40575, libname_cuCtxSetCurrent_41988, libname_cuStreamGetCaptureInfo_41984, libname_cuCtxGetId_39658, libname_cublasGetProperty_39361, libname_cuMemPoolCreate_40279, libname_cuCtxPopCurrent_v2_40732, libname_cuCtxSetCurrent_41463, libname_cuMemAllocFromPoolAsync_38536, libname_cuMemPoolCreate_41596, libname_cuStreamQuery_41973, libname_cuCtxGetId_37318, libname_cuCtxPushCurrent_v2_40729, libname_cuMemPoolTrimTo_39311, libname_cuMemPoolSetAttribute_40353, libname_cublasSsymm_v2_64_39307, libname_cuDeviceGetAttribute_40201, libname_cuMemPoolCreate_40364, libname_cudaRuntimeGetVersion_39775, libname_cuCtxGetCurrent_40830, libname_cuMemAlloc_v2_38538, libname_cuMemsetD32Async_39068, libname_cuMemsetD32Async_39556, libname_cuCtxSetCurrent_40756, libname_cuMemPoolTrimTo_38488, libname_cuDeviceGet_41449, libname_cuStreamQuery_40740, libname_cuStreamGetCaptureInfo_40752, libname_cuStreamSynchronize_37225, libname_cuDeviceGetName_41015, libname_cuCtxSynchronize_38500, libname_cuCtxGetDevice_41967, libname_cuStreamCreate_41741, libname_cuMemGetInfo_v2_38102, libname_cuMemPoolGetAttribute_39590, libname_cuCtxGetCurrent_37272, libname_cuCtxGetApiVersion_41478, libname_cuCtxGetId_41476, libname_cuDeviceSetMemPool_38018, libname_cuLaunchKernel_38608, libname_cuOccupancyMaxPotentialBlockSize_38792, libname_cublasSgemm_v2_64_39435, libname_cuStreamSynchronize_37220, libname_cuMemGetInfo_v2_40442, libname_cuStreamSynchronize_41807, libname_cuDeviceGet_37877, libname_cuMemAlloc_v2_38530, libname_cuMemcpyDtoHAsync_v2_38437, libname_cuDevicePrimaryCtxRetain_37310, libname_cuDeviceCanAccessPeer_38375, libname_cuCtxSynchronize_38494, libname_cublasGetProperty_39352, libname_cuCtxGetDevice_38330, libname_cuPointerGetAttribute_38405, libname_cuStreamCreate_38169, libname_cuStreamCreate_40509, libname_cuStreamSynchronize_40580, libname_cuCtxSetCurrent_41991, libname_cuMemPoolTrimTo_39526, libname_cuMemPoolCreate_38024, libname_cuDeviceGet_40217, libname_cuDeviceGetAttribute_41433, libname_cuCtxSetCurrent_38409, libname_cuCtxGetCurrent_40285, libname_cuMemPoolTrimTo_39511, libname_cuMemPoolTrimTo_39395, libname_cuStreamQuery_41972, libname_cuCtxGetCurrent_37945, libname_cublasSscal_v2_39494, libname_cuMemPoolTrimTo_39462, libname_cuDeviceGetName_37443, libname_cuDeviceGetCount_39613, libname_cublasSgemm_v2_39442, libname_cuCtxSetCurrent_37891, libname_cuMemAllocAsync_38528, libname_cuMemPoolSetAttribute_38013, libname_cuCtxPushCurrent_v2_41961, libname_cuMemcpyDtoDAsync_v2_38454, libname_cuMemcpyHtoDAsync_v2_38348, libname_cuDeviceGetCount_37287, libname_cublasSetStream_v2_39406, libname_cuMemsetD32Async_40788, libname_cuStreamQuery_38336, libname_cuMemGetInfo_v2_41674, libname_cuMemPoolSetAccess_41767, libname_cuStreamSynchronize_41812, libname_cuMemPoolTrimTo_39365, libname_cuCtxGetDevice_40735, libname_cuCtxSetCurrent_38412, libname_cuStreamGetCaptureInfo_38400, libname_cuDeviceGetAttribute_37861, libname_cuStreamQuery_38335, libname_cuDeviceSetMemPool_40358, libname_cuMemPoolTrimTo_39480, libname_cuMemPoolSetAccess_38364, libname_cuCtxGetId_40890, libname_cublasSscal_v2_64_39523, libname_cudaRuntimeGetVersion_41007, libname_cuCtxGetId_37904, libname_cuMemPoolGetAttribute_37264, libname_cuDeviceGetCount_40845, libname_cublasSetMathMode_39329, libname_cuCtxGetCurrent_41517, libname_cuDeviceGetName_39783, libname_cuDevicePrimaryCtxRetain_40882, libname_cublasSsymm_v2_39283, libname_cublasSgemm_v2_39457, libname_cublasSscal_v2_39507, diffejulia_loss_function_37136wrap, libname_cublasSsymm_v2_64_39261, libname_cuMemPoolCreate_37939, libname_cuCtxPopCurrent_v2_38327, libname_cuMemPoolSetAccess_40535, libname_cuMemPoolCreate_41511, libname_cudaRuntimeGetVersion_37435, libname_cublasSscal_v2_64_39490, libname_cuMemPoolGetAttribute_40822, libname_cuMemPoolSetAttribute_41585, libname_cuMemPoolTrimTo_39288, libname_cuStreamQuery_40741, libname_cublasSetStream_v2_39392, libname_cuCtxGetApiVersion_40246, libname_cublasSetStream_v2_39382, libname_cuCtxSetCurrent_40832, libname_cuCtxGetId_40244, libname_cuCtxSetCurrent_39600, libname_cuDeviceCanAccessPeer_40546, libname_cuMemcpyPeerAsync_38458, libname_cuCtxPopCurrent_v2_41964, libname_cuCtxGetApiVersion_37906, libname_cublasSetMathMode_39340, libname_cuCtxGetCurrent_39598, libname_cublasSsymm_v2_39268, libname_cublasSgemm_v2_64_39476, libname_cuCtxSetCurrent_37274, libname_cuLaunchCooperativeKernel_38605, libname_cuDevicePrimaryCtxRetain_39650, libname_cuDeviceSetMemPool_41590, libname_cuMemPoolTrimTo_39343, libname_cuCtxPushCurrent_v2_38324 }) }

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants