In [1]:
using Rays
using CUDA
using CUDAKernels
using KernelAbstractions
using Cthulhu

import Adapt

In [2]:
Adapt.@adapt_structure Rays.Camera

camera = Rays.Camera()
camera = Rays.Camera([CuArray(getfield(camera,f)) for f in fieldnames(Rays.Camera)]...)

Rays.Camera{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Int64, 1, CUDA.Mem.DeviceBuffer}}(Float32[0.0, 0.0, 0.0], Float32[1.0, 0.0, 0.0], Float32[0.0, 0.0, 1.0], Float32[0.0, -1.0, 0.0], Float32[0.1, 0.1], Float32[0.1], [100, 100])

In [3]:
@kernel function shape_view_kernel!(camera, shape, data)::Nothing
    pixel_indices = @index(Global, NTuple)
    V = typeof(camera.loc)

    ray_loc = @private V 3
    ray_dir = @private V 3
    
    Rays.set_ray!(ray_loc, ray_dir)
    # ray_loc, ray_dir = get_ray(camera, pixel_indices)
    # ray = Ryas.Ray(get_ray)
    # int_data = intersect(ray, shape)

    # println(pixel_indices)

    # for data_name in keys(data)
    #     getfield(data)[pixel_indices...] = getfield(int_data, data_name)
    # end

    data.t_int[pixel_indices...] = 1.0

    nothing
end

function shape_view!(
    data::NamedTuple,
    camera::Rays.Camera,
    shape::Rays.Shape)::Nothing

    device = KernelAbstractions.get_device(camera.loc)
    screen_res = collect(camera.screen_res)

    @assert :t_int in keys(data) "data NamedTuple must at least have a key :t_int."

    for (data_name, data_array) in pairs(data)
        @assert device == KernelAbstractions.get_device(data_array) "Array for $data_name data is not on the same device as the camera."
        @assert all(size(data_array) .== screen_res) "Array for $data_name data does not have the same resolution as the camera."
    end

    n = device isa GPU ? 256 : 4
    kernel = shape_view_kernel!(device, n)
    ev = kernel(
        camera, shape, data; ndrange = Tuple(screen_res)
    )
    wait(ev)

    return nothing
end

shape_view! (generic function with 1 method)

In [6]:
Adapt.@adapt_structure Rays.Cube
Adapt.@adapt_structure Rays.Ray

cube = Rays.Cube(CUDA.zeros(Float32, 3), 1.0)
t_int = CUDA.zeros(Float32, collect(camera.screen_res)...)
dim_int = CUDA.zeros(Int, collect(camera.screen_res)...)
data = (; t_int, dim_int)
try
    @device_code_warntype shape_view!(data, camera, cube)
catch err
    code_typed(err; interactive = true)
end

PTX CompilerJob of kernel #gpu_shape_view_kernel!(KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, Rays.Camera{CuDeviceVector{Float32, 1}, CuDeviceVector{Int64, 1}}, Rays.Cube{CuDeviceVector{Float32, 1}}, NamedTuple{(:t_int, :dim_int), Tuple{CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Int64, 1}}}) for sm_75, maxthreads=256



MethodInstance for gpu_shape_view_kernel!(::

KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, ::Rays.Camera{CuDeviceVector{Float32, 1}, CuDeviceVector{Int64, 1}}, ::Rays.Cube{CuDeviceVector{Float32, 1}}, ::NamedTuple{(:t_int, :dim_int), Tuple{CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Int64, 1}}})
  from gpu_shape_view_kernel!([90m__ctx__[39m, [90mcamera[39m, [90mshape[39m, [90mdata[39m)[90m @[39m [90mMain[39m [90m[4mnone:0[24m[39m
Arguments
  #self#

[36m::Core.Const(gpu_shape_view_kernel!)[39m
  __ctx__[36m::KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}[39m
  camera[36m::Rays.Camera{CuDeviceVector{Float32, 1}, CuDeviceVector{Int64, 1}}[39m
  shape[36m::Rays.Cube{CuDeviceVector{Float32, 1}}[39m
  data[36m::NamedTuple{(:t_int, :dim_int), Tuple{CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Int64, 1}}}[39m
Locals
  ray_dir[36m::StaticArraysCore.MVector{3, CuDeviceVector{Float32, 1}}[39m
  ray_loc[36m::StaticArraysCore.MVector{3, CuDeviceVector{Float32, 1}}[39m
  V[36m::Type{CuDeviceVector{Float32, 1}}[39m
  pixel_indices[36m::Tuple{Int64, Int64}[39m
Body[36

[90m1 ─[39m %1  = Main.Nothing

[36m::Core.Const(Nothing)[39m
[90m│  [39m       

Core.NewvarNode

(:(ray_dir))
[90m│  [39m       Core.NewvarNode(:(ray_loc))
[90m│  [39m       Core.NewvarNode(:(V))
[90m│  [39m       Core.NewvarNode(:(pixel_indices))
[90m│  [39m %6  = 

(KernelAbstractions.__validindex)(__ctx__)[36m::Bool[39m
[90m└──[39m       goto #3 if not %6
[90m2 ─[39m       (pixel_indices = KernelAbstractions.__index_Global_NTuple(__ctx__))
[90m│  [39m %9  = Base.getproperty(camera, :loc)[36m::CuDeviceVector{Float32, 1}[39m
[90m│  [39m       (V = Main.typeof(%9))
[90m│  [39m %11 = V[36m::Core.Const(CuDeviceVector{Float32, 1})[39m
[90m│  [39m %12 = KernelAbstractions.Val

((3,))[36m::Core.Const(Val{(3,)}())[39m
[90m│  [39m       (ray_loc = (KernelAbstractions.Scratchpad)(__ctx__, %11, %12))
[90m│  [39m %14 = V[36m::Core.Const(CuDeviceVector{Float32, 1})[39m
[90m│  [39m %15 = KernelAbstractions.Val((3,))[36m::Core.Const(Val{(3,)}())[39m
[90m│  [39m       (ray_dir = (KernelAbstractions.Scratchpad)(__ctx__, %14, %15))
[90m│  [39m %17 = Rays.set_ray![36m::Core.Const(Rays.set_ray!)[39m
[90m│  [39m %18 = ray_loc[36m::StaticArraysCore.MVector{3, CuDeviceVector{Float32, 1}}[39m
[90m│  [39m       (%17)(%18, ray_dir)


[90m│  [39m       Core.Const(:(pixel_indices))
[90m│  [39m       Core.Const(:(Base.getproperty(data, :t_int)))
[90m│  [39m       Core.Const(:(Core.tuple(%21, 1.0)))
[90m│  [39m       Core.Const(:(Core._apply_iterate(Base.iterate, Base.setindex!, %22, %20)))
[90m└──[39m       Core.Const(:(Main.nothing))
[90m3 ┄[39m %25 = Base.convert(%1, Main.nothing)[36m::Core.Const(nothing)[39m
[90m│  [39m %26 = Core.typeassert(%25, %1)[36m::Core.Const(nothing)[39m
[90m└──[39m       return %26





GPUCompiler.InvalidIRError: InvalidIRError: compiling kernel #gpu_shape_view_kernel!(KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, Rays.Camera{CuDeviceVector{Float32, 1}, CuDeviceVector{Int64, 1}}, Rays.Cube{CuDeviceVector{Float32, 1}}, NamedTuple{(:t_int, :dim_int), Tuple{CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Int64, 1}}}) resulted in invalid LLVM IR
Reason: unsupported dynamic function invocation (call to set_ray!)
Stacktrace:
 [1] macro expansion
   @ c:\Users\bart1\Documents\Julia_projects\Rays\examples\Parallel.ipynb:8
 [2] gpu_shape_view_kernel!
   @ C:\Users\bart1\.julia\packages\KernelAbstractions\C8flJ\src\macros.jl:81
 [3] gpu_shape_view_kernel!
   @ .\none:0
Hint: catch this exception as `err` and call `code_typed(err; interactive = true)` to introspect the erronous code with Cthulhu.jl

In [5]:
# using Images: colorview, Gray

# canvas_grayscale = Rays.cam_is_source(collect(t_int))
# colorview(Gray, canvas_grayscale)

unique(collect(t_int))

1-element Vector{Float32}:
 0.0