Skip to content
This repository has been archived by the owner on May 27, 2021. It is now read-only.

Commit

Permalink
Introduce a DynamicKernel object.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Mar 20, 2019
1 parent 7267c79 commit 9af960a
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 12 deletions.
8 changes: 4 additions & 4 deletions src/device/cuda/libcudadevrt.jl
Expand Up @@ -10,13 +10,13 @@ const cudaError_t = Cint
const cudaStream_t = CUDAdrv.CuStream_t

# device-side counterpart of CUDAdrv.launch
@inline function launch(f::Ptr{Cvoid}, blocks::CuDim, threads::CuDim,
@inline function launch(fptr::Ptr{Cvoid}, blocks::CuDim, threads::CuDim,
shmem::Int, stream::CuStream,
args...)
blocks = CuDim3(blocks)
threads = CuDim3(threads)

buf = parameter_buffer(f, blocks, threads, shmem, args...)
buf = parameter_buffer(fptr, blocks, threads, shmem, args...)

ccall("extern cudaLaunchDeviceV2", llvmcall, cudaError_t,
(Ptr{Cvoid}, cudaStream_t),
Expand All @@ -25,13 +25,13 @@ const cudaStream_t = CUDAdrv.CuStream_t
return
end

@generated function parameter_buffer(f::Ptr{Cvoid}, blocks::CuDim3, threads::CuDim3,
@generated function parameter_buffer(fptr::Ptr{Cvoid}, blocks::CuDim3, threads::CuDim3,
shmem::Int, args...)
# allocate a buffer
ex = quote
buf = ccall("extern cudaGetParameterBufferV2", llvmcall, Ptr{Cvoid},
(Ptr{Cvoid}, CuDim3, CuDim3, Cuint),
f, blocks, threads, shmem)
fptr, blocks, threads, shmem)
end

# store the parameters
Expand Down
23 changes: 15 additions & 8 deletions src/execution.jl
Expand Up @@ -219,10 +219,10 @@ macro cuda(ex...)
# dynamic, device-side kernel launch
push!(code.args,
quote
# we're in kernel land already, so no need to convert arguments
# we're in kernel land already, so no need to cudaconvert arguments
local kernel_tt = Tuple{$((:(Core.Typeof($var)) for var in var_exprs)...)}
local kernel = dynamic_cufunction($(esc(f)), kernel_tt)
dynamic_cudacall(kernel, kernel_tt, $(var_exprs...); $(map(esc, call_kwargs)...))
kernel($(var_exprs...); $(map(esc, call_kwargs)...))
end)
else
# regular, host-side kernel launch
Expand Down Expand Up @@ -363,12 +363,19 @@ Kernel

## dynamic parallelism

dynamic_cufunction(f::Core.Function, tt::Type=Tuple{}) =
ccall("extern cudanativeCompileKernel", llvmcall, Ptr{Cvoid}, (Any, Any), f, tt)
struct DynamicKernel{F,TT}
ptr::Ptr{Cvoid}
end

function dynamic_cufunction(f::Core.Function, tt::Type=Tuple{})
# we can't compile here, so drop a marker which will get picked up during compilation
fptr = ccall("extern cudanativeCompileKernel", llvmcall, Ptr{Cvoid}, (Any, Any), f, tt)
DynamicKernel{f,tt}(fptr)
end

@generated function dynamic_cudacall(f::Ptr{Cvoid}, tt::Type, args...;
blocks::CuDim=1, threads::CuDim=1, shmem::Integer=0,
stream::CuStream=CuDefaultStream())
@generated function (kernel::DynamicKernel{F,TT})(args...; blocks::CuDim=1,
threads::CuDim=1,shmem::Integer=0,
stream::CuStream=CuDefaultStream()) where {F,TT}
ex = quote
Base.@_inline_meta
end
Expand All @@ -386,7 +393,7 @@ dynamic_cufunction(f::Core.Function, tt::Type=Tuple{}) =

append!(ex.args, (quote
#GC.@preserve $(converted_args...) begin
launch(f, blocks, threads, shmem, stream, ($(arg_ptrs...),))
launch(kernel.ptr, blocks, threads, shmem, stream, ($(arg_ptrs...),))
#end
end).args)

Expand Down

0 comments on commit 9af960a

Please sign in to comment.