# Dependences

In [16]:
include("../../src/struct_data.jl")
include("../../src/neighbor.jl")

nearest_neighbors (generic function with 1 method)

# Initial Data

In [17]:
@make_struct_func Cubic

Model = ModelPar(
    TimePar(
        t_f      = 10000.0,
        dt       = 0.5
    ),
    NeighborPar(n_knn = 50),
    GeometryPar(
        r_agg       = 10.0,
        position    = [0 0 0],
        outer_ratio = 0.8
    ),
    SimulationPar(
        n_text = 100,
        path_input = "../../data/init/Sphere",
        path_ouput = "AAA",
        name_cell = "Queso"
    )
)

ModelPar(TimePar(10000.0, 0.5), NeighborPar(50), GeometryPar(10.0, [0 0 0], 0.8), SimulationPar(100, "../../data/init/Sphere", "AAA", "Queso"))

In [18]:
@time AGG = Aggregate(1.0, Model, Cubic(1.0,2.0,3.0), ContractilePar(1))

  0.100202 seconds (79.66 k allocations: 4.556 MiB)


Aggregate(0.0, ModelPar(TimePar(10000.0, 0.5), NeighborPar(50), GeometryPar(10.0, [0 0 0], 0.8), SimulationPar(100, "../../data/init/Sphere", "AAA", "Queso")), Cubic{Float64}(1.0, 2.0, 3.0), ContractilePar{Int64}(1), PositionAgg(Float32[-1.5 -2.89 -8.98; 0.5 -2.89 -8.98; … ; 0.5 2.89 8.98; 2.5 2.89 8.98], Float32[0.0 0.0 0.0; 0.0 0.0 0.0; … ; 0.0 0.0 0.0; 0.0 0.0 0.0]), IndexCell{Int64}([1; 1; … ; 1; 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), NeighborAgg(Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], Float32[0.0 0.0 … 0.0 0.0], Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]), ForceAgg(Float32[0.0 0.0 0.0; 0.0 0.0 0.0; … ; 0.0 0.0 0.0; 0.0 0.0 0.0]))

# Program

## Idx

In [19]:
println("------------------------ SIZE ------------------------")
println("r_max         = $(AGG.ForcePar.rₘₐₓ)")
println("Size X        = $(size(AGG.Position.X))")
println("Size idx      = $(size(AGG.Neighbor.idx))")

println("--------------------- SIZE CUDA ----------------------")
threads =(32,32)
blocks  =cld.(size(AGG.Position.X,1),threads)
println("Threads = $(threads)")
println("Blocks  = $(blocks)")
println("--------------------- CUDA SOLVER --------------------")
CUDA.@time @cuda threads=threads blocks=blocks dist_kernel!(AGG.Neighbor.idx, AGG.Position.X ,AGG.ForcePar.rₘₐₓ)
println("------------------------ IDX -------------------------")
idx_real = AGG.Neighbor.idx
display(AGG.Neighbor.idx)


------------------------ SIZE ------------------------
r_max         = 3.0
Size X        = (746, 3)
Size idx      = (746, 746)
--------------------- SIZE CUDA ----------------------
Threads = (32, 32)
Blocks  = (24, 24)
--------------------- CUDA SOLVER --------------------
  0.279528 seconds (70.46 k CPU allocations: 4.896 MiB)
------------------------ IDX -------------------------


746×746 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 1.0  1.0  0.0  1.0   1.0   0.0   0.0  …    0.0    0.0    0.0    0.0    0.0
 2.0  2.0  2.0  0.0   2.0   2.0   0.0       0.0    0.0    0.0    0.0    0.0
 0.0  3.0  3.0  0.0   0.0   3.0   3.0       0.0    0.0    0.0    0.0    0.0
 4.0  0.0  0.0  4.0   4.0   0.0   0.0       0.0    0.0    0.0    0.0    0.0
 5.0  5.0  0.0  5.0   5.0   5.0   0.0       0.0    0.0    0.0    0.0    0.0
 0.0  6.0  6.0  0.0   6.0   6.0   6.0  …    0.0    0.0    0.0    0.0    0.0
 0.0  0.0  7.0  0.0   0.0   7.0   7.0       0.0    0.0    0.0    0.0    0.0
 0.0  0.0  0.0  8.0   0.0   0.0   0.0       0.0    0.0    0.0    0.0    0.0
 0.0  0.0  0.0  9.0   9.0   0.0   0.0       0.0    0.0    0.0    0.0    0.0
 0.0  0.0  0.0  0.0  10.0  10.0   0.0       0.0    0.0    0.0    0.0    0.0
 0.0  0.0  0.0  0.0   0.0  11.0  11.0  …    0.0    0.0    0.0    0.0    0.0
 0.0  0.0  0.0  0.0   0.0   0.0   0.0       0.0    0.0    0.0    0.0    0.0
 0.0  0.0  0.0  0.0   0.0   0.0   0.

# reduce

In [37]:
println("------------------------ SIZE ------------------------")
println("Size idx_real  = $(size(AGG.Neighbor.idx))")
println("Size idx_sum   = $(size(AGG.Neighbor.idx_sum))")
println("Size idx_red   = $(size(AGG.Neighbor.idx_red))")
println("------------------------ IDX -------------------------")
display(AGG.Neighbor.idx)
println("--------------------- SIZE CUDA ----------------------")
threads=1024
blocks=cld.(size(AGG.Neighbor.idx,1),threads)
println("Threads = $(threads)")
println("Blocks  = $(blocks)")
A = AGG.Neighbor.idx
B = AGG.Neighbor.idx_red
C = AGG.Neighbor.idx_sum
println("--------------------- CUDA SOLVER --------------------")
@cuda threads=threads blocks=blocks reduce_kernel(A,B,C)
display(AGG.Neighbor.idx_red)
display(AGG.Neighbor.idx_sum)

------------------------ SIZE ------------------------
Size idx_real  = (746, 746)
Size idx_sum   = (1, 746)
Size idx_red   = (21, 746)
------------------------ IDX -------------------------


746×746 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 1.0  1.0  0.0  1.0   1.0   0.0   0.0  …    0.0    0.0    0.0    0.0    0.0
 2.0  2.0  2.0  0.0   2.0   2.0   0.0       0.0    0.0    0.0    0.0    0.0
 0.0  3.0  3.0  0.0   0.0   3.0   3.0       0.0    0.0    0.0    0.0    0.0
 4.0  0.0  0.0  4.0   4.0   0.0   0.0       0.0    0.0    0.0    0.0    0.0
 5.0  5.0  0.0  5.0   5.0   5.0   0.0       0.0    0.0    0.0    0.0    0.0
 0.0  6.0  6.0  0.0   6.0   6.0   6.0  …    0.0    0.0    0.0    0.0    0.0
 0.0  0.0  7.0  0.0   0.0   7.0   7.0       0.0    0.0    0.0    0.0    0.0
 0.0  0.0  0.0  8.0   0.0   0.0   0.0       0.0    0.0    0.0    0.0    0.0
 0.0  0.0  0.0  9.0   9.0   0.0   0.0       0.0    0.0    0.0    0.0    0.0
 0.0  0.0  0.0  0.0  10.0  10.0   0.0       0.0    0.0    0.0    0.0    0.0
 0.0  0.0  0.0  0.0   0.0  11.0  11.0  …    0.0    0.0    0.0    0.0    0.0
 0.0  0.0  0.0  0.0   0.0   0.0   0.0       0.0    0.0    0.0    0.0    0.0
 0.0  0.0  0.0  0.0   0.0   0.0   0.

--------------------- SIZE CUDA ----------------------
Threads = 1024
Blocks  = 1
--------------------- CUDA SOLVER --------------------


LoadError: InvalidIRError: compiling kernel #reduce_kernel(CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}) resulted in invalid LLVM IR
[31mReason: unsupported dynamic function invocation[39m[31m (call to var"#sprint#385"(context, sizehint::Integer, ::typeof(sprint), f::Function, args...) in Base at strings/io.jl:100)[39m
Stacktrace:
 [1] [0m[1m#repr#386[22m
[90m   @ [39m[90m./strings/[39m[90;4mio.jl:219[0m
 [2] [0m[1mlimitrepr[22m
[90m   @ [39m[90m./strings/[39m[90;4mio.jl:221[0m
 [3] [0m[1mto_index[22m
[90m   @ [39m[90m./[39m[90;4mindices.jl:300[0m
 [4] [0m[1mto_index[22m
[90m   @ [39m[90m./[39m[90;4mindices.jl:277[0m
 [5] [0m[1mto_indices[22m
[90m   @ [39m[90m./[39m[90;4mindices.jl:333[0m
 [6] [0m[1mto_indices[22m
[90m   @ [39m[90m./[39m[90;4mindices.jl:324[0m
 [7] [0m[1msetindex![22m
[90m   @ [39m[90m./[39m[90;4mabstractarray.jl:1267[0m
 [8] [0m[1mreduce_kernel[22m
[90m   @ [39m[90m~/Documentos/Maestria-GitHub/CellAggregate.jl/src/[39m[90;4mneighbor.jl:79[0m
[31mReason: unsupported call through a literal pointer[39m[31m (call to )[39m
Stacktrace:
 [1] [0m[1m_string_n[22m
[90m   @ [39m[90m./strings/[39m[90;4mstring.jl:74[0m
 [2] [0m[1mStringVector[22m
[90m   @ [39m[90m./[39m[90;4miobuffer.jl:31[0m
 [3] [0m[1m#IOBuffer#361[22m
[90m   @ [39m[90m./[39m[90;4miobuffer.jl:114[0m
 [4] [0m[1mprint_to_string[22m
[90m   @ [39m[90m./strings/[39m[90;4mio.jl:133[0m
[31mReason: unsupported call through a literal pointer[39m[31m (call to )[39m
Stacktrace:
 [1] [0m[1munsafe_wrap[22m
[90m   @ [39m[90m./strings/[39m[90;4mstring.jl:85[0m
 [2] [0m[1mStringVector[22m
[90m   @ [39m[90m./[39m[90;4miobuffer.jl:31[0m
 [3] [0m[1m#IOBuffer#361[22m
[90m   @ [39m[90m./[39m[90;4miobuffer.jl:114[0m
 [4] [0m[1mprint_to_string[22m
[90m   @ [39m[90m./strings/[39m[90;4mio.jl:133[0m
[31mReason: unsupported call through a literal pointer[39m[31m (call to )[39m
Stacktrace:
 [1] [0m[1mfill![22m
[90m   @ [39m[90m./[39m[90;4marray.jl:406[0m
 [2] [0m[1m#IOBuffer#361[22m
[90m   @ [39m[90m./[39m[90;4miobuffer.jl:121[0m
 [3] [0m[1mprint_to_string[22m
[90m   @ [39m[90m./strings/[39m[90;4mio.jl:133[0m
[31mReason: unsupported dynamic function invocation[39m[31m (call to print)[39m
Stacktrace:
 [1] [0m[1mprint_to_string[22m
[90m   @ [39m[90m./strings/[39m[90;4mio.jl:135[0m
[31mReason: unsupported call through a literal pointer[39m[31m (call to )[39m
Stacktrace:
 [1] [0m[1m_growend![22m
[90m   @ [39m[90m./[39m[90;4marray.jl:888[0m
 [2] [0m[1mresize![22m
[90m   @ [39m[90m./[39m[90;4marray.jl:1108[0m
 [3] [0m[1mprint_to_string[22m
[90m   @ [39m[90m./strings/[39m[90;4mio.jl:137[0m
[31mReason: unsupported call through a literal pointer[39m[31m (call to )[39m
Stacktrace:
 [1] [0m[1m_deleteend![22m
[90m   @ [39m[90m./[39m[90;4marray.jl:897[0m
 [2] [0m[1mresize![22m
[90m   @ [39m[90m./[39m[90;4marray.jl:1113[0m
 [3] [0m[1mprint_to_string[22m
[90m   @ [39m[90m./strings/[39m[90;4mio.jl:137[0m
[31mReason: unsupported call through a literal pointer[39m[31m (call to )[39m
Stacktrace:
 [1] [0m[1mString[22m
[90m   @ [39m[90m./strings/[39m[90;4mstring.jl:53[0m
 [2] [0m[1mprint_to_string[22m
[90m   @ [39m[90m./strings/[39m[90;4mio.jl:137[0m
[36m[1mHint[22m[39m[36m: catch this exception as `err` and call `code_typed(err; interactive = true)` to introspect the erronous code with Cthulhu.jl[39m

In [28]:
AGG.Neighbor.idx_sum

1×746 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0