# Installing Dependencies

In [190]:
using DelimitedFiles
using CUDA

# Initial Conditions

In [204]:
function init()
    R_Agg = 13
    CUDA.@time global X = readdlm("../data/Init/Two_Sphere/$R_Agg.csv", ',', Float32, header=true)[1][:, 1:3] |> cu

    # Inizializate Variables
    println("Inizializate Variables")
    CUDA.@time global i_Cell = CuArray{Float32}(undef, (size(X, 1), size(X, 1), 3))
    CUDA.@time global Dist = CuArray{Float32}(undef, (size(X, 1), size(X, 1)))
    CUDA.@time global idx = hcat([[CartesianIndex(i,1) for i=1:14] for j=1:size(X,1)]...) |> cu
end
init()

  0.007685 seconds (295.47 k CPU allocations: 7.882 MiB) (1 GPU allocation: 37.969 KiB, 0.13% memmgmt time)
Inizializate Variables
  0.000006 seconds (8 CPU allocations: 256 bytes) (1 GPU allocation: 120.135 MiB, 41.48% memmgmt time)
  0.000003 seconds (8 CPU allocations: 240 bytes) (1 GPU allocation: 40.045 MiB, 64.58% memmgmt time)
  0.030170 seconds (115.17 k CPU allocations: 7.980 MiB) (1 GPU allocation: 708.750 KiB, 0.05% memmgmt time)


14×3240 CuArray{CartesianIndex{2}, 2, CUDA.Mem.DeviceBuffer}:
 CartesianIndex(1, 1)   CartesianIndex(1, 1)   …  CartesianIndex(1, 1)
 CartesianIndex(2, 1)   CartesianIndex(2, 1)      CartesianIndex(2, 1)
 CartesianIndex(3, 1)   CartesianIndex(3, 1)      CartesianIndex(3, 1)
 CartesianIndex(4, 1)   CartesianIndex(4, 1)      CartesianIndex(4, 1)
 CartesianIndex(5, 1)   CartesianIndex(5, 1)      CartesianIndex(5, 1)
 CartesianIndex(6, 1)   CartesianIndex(6, 1)   …  CartesianIndex(6, 1)
 CartesianIndex(7, 1)   CartesianIndex(7, 1)      CartesianIndex(7, 1)
 CartesianIndex(8, 1)   CartesianIndex(8, 1)      CartesianIndex(8, 1)
 CartesianIndex(9, 1)   CartesianIndex(9, 1)      CartesianIndex(9, 1)
 CartesianIndex(10, 1)  CartesianIndex(10, 1)     CartesianIndex(10, 1)
 CartesianIndex(11, 1)  CartesianIndex(11, 1)  …  CartesianIndex(11, 1)
 CartesianIndex(12, 1)  CartesianIndex(12, 1)     CartesianIndex(12, 1)
 CartesianIndex(13, 1)  CartesianIndex(13, 1)     CartesianIndex(13, 1)
 CartesianI

# CUDA kNN

In [205]:
function knn_cu()

    # Definig Variables for calculing knn
    global i_Cell; global Dist; 
    global idx
    
    # Defining Coordinates of each cell on the aggregates
    println("Defining Coordinates of each cell on the aggregates")
    CUDA.@time i_Cell = reshape(repeat(X, size(X ,1)), size(X ,1), size(X ,1), 3) - reshape(repeat(X, inner=(size(X ,1),1)), size(X ,1), size(X ,1), 3)

    # Calculating Norm on every cell on the aggregate
    println("Calculating distances on every cell on the aggregate")
    CUDA.@time Dist = sqrt.(i_Cell[:,:,1] .^ 2 + i_Cell[:,:,2] .^ 2 + i_Cell[:,:,3] .^ 2)

    # Calculating index of knof each cell in the aggregate
    println("Calculating index of knn on each cell in the aggregate")
    CUDA.@time for i = 1:14
        idx[i,:] = findmin(Dist; dims=1)[2]
        Dist[idx[i,:]] .= Inf
    end
end

knn_cu (generic function with 1 method)

In [215]:
for i=1:5
    println("___________________________________ \n")
    knn_cu()
end

___________________________________ 

Defining Coordinates of each cell on the aggregates
  0.003174 seconds (109 CPU allocations: 5.531 KiB) (3 GPU allocations: 360.406 MiB, 0.55% memmgmt time)
Calculating distances on every cell on the aggregate
  0.002891 seconds (227 CPU allocations: 10.719 KiB) (9 GPU allocations: 360.406 MiB, 1.02% memmgmt time)
Calculating index of knn on each cell in the aggregate
  0.010136 seconds (2.35 k CPU allocations: 821.500 KiB) (56 GPU allocations: 2.596 MiB, 1.21% memmgmt time)
___________________________________ 

Defining Coordinates of each cell on the aggregates
  0.002998 seconds (110 CPU allocations: 5.594 KiB) (3 GPU allocations: 360.406 MiB, 0.43% memmgmt time)
Calculating distances on every cell on the aggregate
  0.003476 seconds (226 CPU allocations: 10.422 KiB) (9 GPU allocations: 360.406 MiB, 0.56% memmgmt time)
Calculating index of knn on each cell in the aggregate
  0.009204 seconds (2.30 k CPU allocations: 818.266 KiB) (56 GPU allocati