Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add printf support #163

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Cassette = "7057c7e9-c182-5462-911a-8362d720325c"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
Expand Down
107 changes: 106 additions & 1 deletion src/KernelAbstractions.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
module KernelAbstractions

export @kernel
export @Const, @localmem, @private, @uniform, @synchronize, @index, groupsize, @print
export @Const, @localmem, @private, @uniform, @synchronize, @index, groupsize, @print, @printf
export Device, GPU, CPU, CUDADevice, Event, MultiEvent, NoneEvent
export async_copy!


using MacroTools
using Printf
using StaticArrays
using Cassette
using Adapt
Expand All @@ -28,6 +29,7 @@ and then invoked on the arguments.
- [`@uniform`](@ref)
- [`@synchronize`](@ref)
- [`@print`](@ref)
- [`@printf`](@ref)

# Example:

Expand Down Expand Up @@ -236,6 +238,37 @@ macro print(items...)
end
end

@generated function promote_c_argument(arg)
# > When a function with a variable-length argument list is called, the variable
# > arguments are passed using C's old ``default argument promotions.'' These say that
# > types char and short int are automatically promoted to int, and type float is
# > automatically promoted to double. Therefore, varargs functions will never receive
# > arguments of type char, short int, or float.

if arg == Cchar || arg == Cshort
return :(Cint(arg))
elseif arg == Cfloat
return :(Cdouble(arg))
else
return :(arg)
end
end
charleskawczynski marked this conversation as resolved.
Show resolved Hide resolved

"""
@printf(fmt::String, args...)

This is a unified formatted print statement.

# Platform differences
- `GPU`: This will reorganize the items to print via @cuprintf
- `CPU`: This will call `print(items...)`
"""
macro printf(fmt::String, args...)
fmt_val = Val(Symbol(fmt))

return :(__printf($fmt_val, $(map(arg -> :(promote_c_argument($arg)), esc.(args))...)))
end

"""
@index

Expand Down Expand Up @@ -452,6 +485,78 @@ end
end
end

# Results in "Conversion of boxed type String is not allowed"
# @generated function __printf(::Val{fmt}, argspec...) where {fmt}
# arg_exprs = [:( argspec[$i] ) for i in 1:length(argspec)]
# arg_types = [argspec...]

# T_void = LLVM.VoidType(LLVM.Interop.JuliaContext())
# T_int32 = LLVM.Int32Type(LLVM.Interop.JuliaContext())
# T_pint8 = LLVM.PointerType(LLVM.Int8Type(LLVM.Interop.JuliaContext()))

# # create functions
# param_types = LLVMType[convert.(LLVMType, arg_types)...]
# llvm_f, _ = create_function(T_int32, param_types)
# mod = LLVM.parent(llvm_f)
# sfmt = String(fmt)
# # generate IR
# Builder(LLVM.Interop.JuliaContext()) do builder
# entry = BasicBlock(llvm_f, "entry", LLVM.Interop.JuliaContext())
# position!(builder, entry)

# str = globalstring_ptr!(builder, sfmt)

# # construct and fill args buffer
# if isempty(argspec)
# buffer = LLVM.PointerNull(T_pint8)
# else
# argtypes = LLVM.StructType("printf_args", LLVM.Interop.JuliaContext())
# elements!(argtypes, param_types)

# args = alloca!(builder, argtypes)
# for (i, param) in enumerate(parameters(llvm_f))
# p = struct_gep!(builder, args, i-1)
# store!(builder, param, p)
# end

# buffer = bitcast!(builder, args, T_pint8)
# end

# # invoke vprintf and return
# vprintf_typ = LLVM.FunctionType(T_int32, [T_pint8, T_pint8])
# vprintf = LLVM.Function(mod, "vprintf", vprintf_typ)
# chars = call!(builder, vprintf, [str, buffer])

# ret!(builder, chars)
# end

# arg_tuple = Expr(:tuple, arg_exprs...)
# call_function(llvm_f, Int32, Tuple{arg_types...}, arg_tuple)
# end

# Results in "InvalidIRError: compiling kernel
# gpu_kernel_printf(... Reason: unsupported dynamic
# function invocation"
@generated function __printf(::Val{fmt}, items...) where {fmt}
str = ""
args = []

for i in 1:length(items)
item = :(items[$i])
T = items[i]
if T <: Val
item = QuoteNode(T.parameters[1])
end
push!(args, item)
end
sfmt = String(fmt)
quote
# @sprintf($sfmt, $(args...))
@print(@sprintf($sfmt, $(args...)))
# @print("test")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# @sprintf($sfmt, $(args...))
@print(@sprintf($sfmt, $(args...)))
# @print("test")
@printf($sfmt, $(args...))

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This results in a StackOverflowError

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it should be Base.@printf?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, yes. Needed Printf.@printf

end
end

###
# Backends/Implementation
###
Expand Down
4 changes: 4 additions & 0 deletions src/backends/cpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,10 @@ end
__print(items...)
end

@inline function Cassette.overdub(ctx::CPUCtx, ::typeof(__printf), fmt, items...)
__printf(fmt, items...)
end

generate_overdubs(CPUCtx)

# Don't recurse into these functions
Expand Down
4 changes: 4 additions & 0 deletions src/backends/cuda.jl
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,10 @@ end
CUDA._cuprint(args...)
end

@inline function Cassette.overdub(ctx::CUDACtx, ::typeof(__printf), fmt, args...)
CUDA._cuprintf(fmt, args...)
end

###
# GPU implementation of const memory
###
Expand Down
27 changes: 24 additions & 3 deletions test/print_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,46 @@ if has_cuda_gpu()
CUDA.allowscalar(false)
end

struct Foo{A,B} end

@kernel function kernel_print()
I = @index(Global)
@print("Hello from thread ", I, "!\n")
end

@kernel function kernel_printf()
I = @index(Global)
@printf("Hello printf %s thread %d! type = %s.\n", "from", I, nameof(Foo))
end

function test_print(backend)
kernel = kernel_print(backend, 4)
kernel(ndrange=(4,))
kernel(ndrange=(4,))
end

function test_printf(backend)
kernel = kernel_printf(backend, 4)
kernel(ndrange=(4,))
end

@testset "print test" begin
wait(test_print(CPU()))
@test true

wait(test_printf(CPU()))
@test true

if has_cuda_gpu()
wait(test_print(CUDADevice()))
@test true
wait(test_printf(CUDADevice()))
@test true
end

wait(test_print(CPU()))
@print("Why this should work")
@test true

@print("Why this should work")
@printf("Why this should work")
@test true
end