Skip to content
This repository has been archived by the owner on May 27, 2021. It is now read-only.

Commit

Permalink
Add wrappers for older versions of Julia.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Mar 22, 2019
1 parent d4d6aca commit b1ecb30
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 11 deletions.
9 changes: 6 additions & 3 deletions src/compiler/optim.jl
Expand Up @@ -58,10 +58,13 @@ function optimize!(job::CompilerJob, mod::LLVM.Module, entry::LLVM.Function)
add!(pm, ModulePass("LowerPTLS", lower_ptls!))

# the Julia GC lowering pass also has some clean-up that is required
function LLVMAddLateLowerGCFramePass(PM::LLVM.API.LLVMPassManagerRef)
LLVM.@apicall(:LLVMExtraAddLateLowerGCFramePass,Cvoid,(LLVM.API.LLVMPassManagerRef,), PM)
if VERSION >= v"1.2.0-DEV.520"
# TODO: move this to LLVM.jl
function LLVMAddLateLowerGCFramePass(PM::LLVM.API.LLVMPassManagerRef)
LLVM.@apicall(:LLVMExtraAddLateLowerGCFramePass,Cvoid,(LLVM.API.LLVMPassManagerRef,), PM)
end
LLVMAddLateLowerGCFramePass(LLVM.ref(pm))
end
LLVMAddLateLowerGCFramePass(LLVM.ref(pm))

run!(pm, mod)
end
Expand Down
68 changes: 63 additions & 5 deletions src/device/cuda/libcudadevrt.jl
Expand Up @@ -9,6 +9,25 @@ import CUDAdrv: CuDim3, CuStream_t
const cudaError_t = Cint
const cudaStream_t = CUDAdrv.CuStream_t

if VERSION >= v"1.2.0-DEV.512"
@inline cudaLaunchDevice(buf::Ptr{Cvoid}, stream::CuStream) =
ccall("extern cudaLaunchDeviceV2", llvmcall, cudaError_t,
(Ptr{Cvoid}, cudaStream_t),
buf, stream)
else
import Base.Sys: WORD_SIZE
# declare i32 @cudaLaunchDeviceV2(i8*, %struct.CUstream_st*)
@eval @inline cudaLaunchDevice(buf::Ptr{Cvoid}, stream::CuStream) =
Base.llvmcall(
( "declare i32 @cudaLaunchDeviceV2(i8*, i8*)",
$"%buf = inttoptr i$WORD_SIZE %0 to i8*
%stream = inttoptr i$WORD_SIZE %1 to i8*
%rv = call i32 @cudaLaunchDeviceV2(i8* %buf, i8* %stream)
ret i32 %rv"), cudaError_t,
Tuple{Ptr{Cvoid}, cudaStream_t},
buf, Base.unsafe_convert(cudaStream_t, stream))
end

# device-side counterpart of CUDAdrv.launch
@inline function launch(f::Ptr{Cvoid}, blocks::CuDim, threads::CuDim,
shmem::Int, stream::CuStream,
Expand All @@ -17,14 +36,43 @@ const cudaStream_t = CUDAdrv.CuStream_t
threads = CuDim3(threads)

buf = parameter_buffer(f, blocks, threads, shmem, args...)

ccall("extern cudaLaunchDeviceV2", llvmcall, cudaError_t,
(Ptr{Cvoid}, cudaStream_t),
buf, stream)
cudaLaunchDevice(buf, stream)

return
end

if false && VERSION >= v"1.2.0-DEV.512"
@inline cudaGetParameterBuffer(f::Ptr{Cvoid}, blocks::CuDim3, threads::CuDim3, shmem::Integer) =
ccall("extern cudaGetParameterBufferV2", llvmcall, Ptr{Cvoid},
(Ptr{Cvoid}, CuDim3, CuDim3, Cuint),
f, blocks, threads, shmem)
else
@inline cudaGetParameterBuffer(f::Ptr{Cvoid}, blocks::CuDim3, threads::CuDim3, shmem::Integer) =
cudaGetParameterBuffer(f,
blocks.x, blocks.y, blocks.z,
threads.x, threads.y, threads.z,
convert(Cuint, shmem))
# declare i8* @cudaGetParameterBufferV2(i8*, %struct.dim3, %struct.dim3, i32)
@eval @inline cudaGetParameterBuffer(f::Ptr{Cvoid},
blocks_x::Cuint, blocks_y::Cuint, blocks_z::Cuint,
threads_x::Cuint, threads_y::Cuint, threads_z::Cuint,
shmem::Cuint) =
Base.llvmcall(
( "declare i8* @cudaGetParameterBufferV2(i8*, {i32,i32,i32}, {i32,i32,i32}, i32)",
$"%f = inttoptr i$WORD_SIZE %0 to i8*
%blocks.x = insertvalue { i32, i32, i32 } undef, i32 %1, 0
%blocks.y = insertvalue { i32, i32, i32 } %blocks.x, i32 %2, 1
%blocks.z = insertvalue { i32, i32, i32 } %blocks.y, i32 %3, 2
%threads.x = insertvalue { i32, i32, i32 } undef, i32 %4, 0
%threads.y = insertvalue { i32, i32, i32 } %threads.x, i32 %5, 1
%threads.z = insertvalue { i32, i32, i32 } %threads.y, i32 %6, 2
%rv = call i8* @cudaGetParameterBufferV2(i8* %f, {i32,i32,i32} %blocks.z, {i32,i32,i32} %threads.z, i32 %7)
%buf = ptrtoint i8* %rv to i$WORD_SIZE
ret i$WORD_SIZE %buf"), Ptr{Cvoid},
Tuple{Ptr{Cvoid}, Cuint, Cuint, Cuint, Cuint, Cuint, Cuint, Cuint},
f, blocks_x, blocks_y, blocks_z, threads_x, threads_y, threads_z, shmem)
end

@generated function parameter_buffer(f::Ptr{Cvoid}, blocks::CuDim3, threads::CuDim3,
shmem::Int, args...)
# allocate a buffer
Expand Down Expand Up @@ -55,6 +103,16 @@ end
return ex
end

if VERSION >= v"1.2.0-DEV.512"
@inline synchronize() = ccall("extern cudaDeviceSynchronize", llvmcall, Cint, ())
else
@eval @inline synchronize() =
Base.llvmcall(
("declare i32 @cudaDeviceSynchronize()",
"%rv = call i32 @cudaDeviceSynchronize()
ret i32 %rv"), cudaError_t, Tuple{})
end

"""
synchronize()
Expand All @@ -64,4 +122,4 @@ and should not be called from the host.
`synchronize` acts as a synchronization point for
child grids in the context of dynamic parallelism.
"""
@inline synchronize() = ccall("extern cudaDeviceSynchronize", llvmcall, Cint, ())
synchronize
17 changes: 14 additions & 3 deletions src/execution.jl
Expand Up @@ -446,17 +446,28 @@ No keyword arguments are supported.
@inline dynamic_cufunction(f::Core.Function, tt::Type=Tuple{}) =
delayed_cufunction(Val(f), Val(tt))

# marker function that will get picked up during compilation
if VERSION >= v"1.2.0-DEV.512"
@inline cudanativeCompileKernel(id::Int) =
ccall("extern cudanativeCompileKernel", llvmcall, Ptr{Cvoid}, (Int,), id)
else
import Base.Sys: WORD_SIZE
@eval @inline cudanativeCompileKernel(id::Int) =
Base.llvmcall(
($"declare i$WORD_SIZE @cudanativeCompileKernel(i$WORD_SIZE)",
$"%rv = call i$WORD_SIZE @cudanativeCompileKernel(i$WORD_SIZE %0)
ret i$WORD_SIZE %rv"), Ptr{Cvoid}, Tuple{Int}, id)
end

const delayed_cufunctions = Vector{Tuple{Core.Function,Type}}()
@generated function delayed_cufunction(::Val{f}, ::Val{tt}) where {f,tt}
global delayed_cufunctions
push!(delayed_cufunctions, (f,tt))
id = length(delayed_cufunctions)

quote
# drop a marker which will get picked up during compilation
# TODO: add an edge to this method instance to support method redefinitions
fptr = ccall("extern cudanativeCompileKernel", llvmcall, Ptr{Cvoid}, (Int,), $id)

fptr = cudanativeCompileKernel($id)
DeviceKernel{f,tt}(fptr)
end
end
Expand Down

0 comments on commit b1ecb30

Please sign in to comment.