Skip to content

Commit

Permalink
Implement :func ABI for calling GPUCompiler emitted code
Browse files Browse the repository at this point in the history
This ABI is always ccallable, unlike the `:specfunc` ABI which
frequently is not. In the future, it would be good to selectively
re-enable the `:specfunc` ABI for types that are safely ccallable.

The `:specfunc` ABI is interesting to us not just because it's fast.
It allows us to frequently avoid allocations for passing arguments /
return values, unlike the `:func` ABI which (almost) always allocates.
  • Loading branch information
topolarity committed Feb 1, 2024
1 parent d166c90 commit 7ed4ef8
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 18 deletions.
2 changes: 1 addition & 1 deletion src/AllocCheck.jl
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ function check_allocs(@nospecialize(func), @nospecialize(types); ignore_throw=tr
end
source = GPUCompiler.methodinstance(Base._stable_typeof(func), Base.to_tuple_type(types))
target = DefaultCompilerTarget()
job = CompilerJob(source, config)
job = CompilerJob(source, specsig_config)
allocs = JuliaContext() do ctx
mod, meta = GPUCompiler.compile(:llvm, job, validate=false, optimize=false, cleanup=false)
optimize!(job, mod)
Expand Down
42 changes: 29 additions & 13 deletions src/compiler.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ else
end
end

struct CompileResult{Success, F, TT, RT}
struct CompileResult{Success, F, TT, RT, ABI}
f_ptr::Ptr{Cvoid}
arg_types::Type{TT}
return_type::Type{RT}
Expand All @@ -66,7 +66,9 @@ const tm = Ref{TargetMachine}() # for opt pipeline
const _kernel_instances = Dict{Any, Any}()
const compiler_cache = Dict{Any, CompileResult}()
const config = CompilerConfig(DefaultCompilerTarget(), NativeParams();
kernel=false, entry_abi = :specfunc, always_inline=false)
kernel=false, entry_abi = :func, always_inline=false)
const specsig_config = CompilerConfig(DefaultCompilerTarget(), NativeParams();
kernel=false, entry_abi = :specfunc, always_inline=false)

const NativeCompilerJob = CompilerJob{NativeCompilerTarget,NativeParams}
GPUCompiler.can_safepoint(@nospecialize(job::NativeCompilerJob)) = true
Expand Down Expand Up @@ -101,23 +103,28 @@ automatically and checked for allocations whenever the function changes or when
types or keyword arguments are provided.
"""
function compile_callable(f::F, tt::TT=Tuple{}; ignore_throw=true) where {F, TT}
# cuda = active_state()

Base.@lock codegen_lock begin

# compile the function
cache = compiler_cache
source = GPUCompiler.methodinstance(F, tt)
rt = Core.Compiler.return_type(f, tt)

specsig = false # TODO: selectively re-enable after reviewing `uses_specsig` in codegen.cpp
function compile(@nospecialize(job::CompilerJob))
return JuliaContext() do ctx

# First, compile once just to analyze allocations
specsig_job = GPUCompiler.CompilerJob(job.source, specsig_config, job.world)
ss_mod, ss_meta = GPUCompiler.compile(:llvm, specsig_job, validate=false)
optimize!(specsig_job, ss_mod)
analysis = find_allocs!(ss_mod, ss_meta; ignore_throw)

# Second, compile again (sigh) with the correct ABI for calling from a ccall
mod, meta = GPUCompiler.compile(:llvm, job, validate=false)
optimize!(job, mod)

clone = copy(mod)
analysis = find_allocs!(mod, meta; ignore_throw)
# TODO: This is the wrong meta
return clone, meta, analysis
return mod, meta, analysis
end
end
function link(@nospecialize(job::CompilerJob), (mod, meta, analysis))
Expand All @@ -136,13 +143,15 @@ function compile_callable(f::F, tt::TT=Tuple{}; ignore_throw=true) where {F, TT}
"Failed to compile @check_allocs function"))
end
if length(analysis) == 0
CompileResult{true, typeof(f), tt, rt}(f_ptr, tt, rt, f, analysis)
CompileResult{true, typeof(f), tt, rt, specsig}(f_ptr, tt, rt, f, analysis)
else
CompileResult{false, typeof(f), tt, rt}(f_ptr, tt, rt, f, analysis)
CompileResult{false, typeof(f), tt, rt, specsig}(f_ptr, tt, rt, f, analysis)
end
end
end
fun = GPUCompiler.cached_compilation(cache, source, config, compile, link)

config′ = specsig ? specsig_config : config
fun = GPUCompiler.cached_compilation(cache, source, config′, compile, link)

# create a callable object that captures the function instance. we don't need to think
# about world age here, as GPUCompiler already does and will return a different object
Expand All @@ -151,9 +160,16 @@ function compile_callable(f::F, tt::TT=Tuple{}; ignore_throw=true) where {F, TT}
end
end

function (f::CompileResult{Success, F, TT, RT})(args...) where {Success, F, TT, RT}
function (f::CompileResult{Success, F, TT, RT, ABI})(args...) where {Success, F, TT, RT, ABI}
if Success
return abi_call(f.f_ptr, RT, TT, f.func, args...)
if ABI
return abi_call(f.f_ptr, RT, TT, f.func, args...)
else
argsv = Any[args...]
GC.@preserve argsv begin
return ccall(f.f_ptr, Any, (Any, Ptr{Any}, UInt32), f.func, pointer(argsv), length(args))
end
end
else
error("@check_allocs function contains ", length(f.analysis), " allocations.")
end
Expand Down
26 changes: 22 additions & 4 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -327,8 +327,26 @@ Documentation for `issue64`.
v[i], v[j] = v[j], v[i]
v
end
let io = IOBuffer()
print(io, @doc issue64)
s = String(take!(io))
@test occursin("Documentation for `issue64`.", s)

@check_allocs function foo_with_union_rt(t::Tuple{Float64, Float64})
if rand((1, -1)) == 1
return t
else
return nothing
end
end

@testset "issues" begin
# issue #64
let io = IOBuffer()
print(io, @doc issue64)
s = String(take!(io))
@test occursin("Documentation for `issue64`.", s)
end

# issue #70
x = foo_with_union_rt((1.0, 1.5))
@test x === nothing || x === (1.0, 1.5)
x = foo_with_union_rt((1.0, 1.5))
@test x === nothing || x === (1.0, 1.5)
end

0 comments on commit 7ed4ef8

Please sign in to comment.