# Installing Dependencies

In [1]:
using DelimitedFiles
using CUDA

# Initial Conditions

In [2]:
function init()
    R_Agg = 13
    CUDA.@time global X = readdlm("../data/Init/Two_Sphere/$R_Agg.csv", ',', Float32, header=true)[1][:, 1:3] |> cu

    # Inizializate Variables
    println("Inizializate Variables")
    CUDA.@time global i_Cell = CuArray{Float32}(undef, (size(X, 1), size(X, 1), 3))
    CUDA.@time global Dist = CuArray{Float32}(undef, (size(X, 1), size(X, 1)))
    CUDA.@time global idx = hcat([[CartesianIndex(i,1) for i=1:14] for j=1:size(X,1)]...) |> cu
end
init()

  0.472864 seconds (994.41 k CPU allocations: 

45.484 MiB, 2.16% gc time) (1 GPU allocation: 

37.969 KiB, 11.66% memmgmt time)
Inizializate Variables
  0.003541 seconds (1.41 k CPU allocations: 80.974 KiB) (1 GPU allocation: 120.135 MiB, 12.95% memmgmt time)
  0.000229 seconds (8 CPU allocations: 240 bytes) (1 GPU allocation: 40.045 MiB, 97.68% memmgmt time)


  5.237972 seconds (795.78 k CPU allocations: 44.407 MiB, 0.16% gc time) (1 GPU allocation: 708.750 KiB, 0.00% memmgmt time)


14×3240 CuArray{CartesianIndex{2}, 2, CUDA.Mem.DeviceBuffer}:
 CartesianIndex(1, 1)   CartesianIndex(1, 1)   …  CartesianIndex(1, 1)
 CartesianIndex(2, 1)   CartesianIndex(2, 1)      CartesianIndex(2, 1)
 CartesianIndex(3, 1)   CartesianIndex(3, 1)      CartesianIndex(3, 1)
 CartesianIndex(4, 1)   CartesianIndex(4, 1)      CartesianIndex(4, 1)
 CartesianIndex(5, 1)   CartesianIndex(5, 1)      CartesianIndex(5, 1)
 CartesianIndex(6, 1)   CartesianIndex(6, 1)   …  CartesianIndex(6, 1)
 CartesianIndex(7, 1)   CartesianIndex(7, 1)      CartesianIndex(7, 1)
 CartesianIndex(8, 1)   CartesianIndex(8, 1)      CartesianIndex(8, 1)
 CartesianIndex(9, 1)   CartesianIndex(9, 1)      CartesianIndex(9, 1)
 CartesianIndex(10, 1)  CartesianIndex(10, 1)     CartesianIndex(10, 1)
 CartesianIndex(11, 1)  CartesianIndex(11, 1)  …  CartesianIndex(11, 1)
 CartesianIndex(12, 1)  CartesianIndex(12, 1)     CartesianIndex(12, 1)
 CartesianIndex(13, 1)  CartesianIndex(13, 1)     CartesianIndex(13, 1)
 CartesianI

# CUDA kNN

In [3]:
function knn_cu()

    # Definig Variables for calculing knn
    global i_Cell; global Dist; 
    global idx
    
    # Defining Coordinates of each cell on the aggregates
    println("Defining Coordinates of each cell on the aggregates")
    CUDA.@time i_Cell = reshape(repeat(X, size(X ,1)), size(X ,1), size(X ,1), 3) - reshape(repeat(X, inner=(size(X ,1),1)), size(X ,1), size(X ,1), 3)

    # Calculating Norm on every cell on the aggregate
    println("Calculating distances on every cell on the aggregate")
    CUDA.@time Dist = sqrt.(i_Cell[:,:,1] .^ 2 + i_Cell[:,:,2] .^ 2 + i_Cell[:,:,3] .^ 2)

    # Calculating index of knof each cell in the aggregate
    println("Calculating index of knn on each cell in the aggregate")
    CUDA.@time for i = 1:14
        idx[i,:] = findmin(Dist; dims=1)[2]
        Dist[idx[i,:]] .= Inf
    end
end

knn_cu (generic function with 1 method)

In [5]:
for i=1:5
    println("___________________________________ \n")
    knn_cu()
end

___________________________________ 

Defining Coordinates of each cell on the aggregates
  0.002947 seconds (109 CPU allocations: 5.531 KiB) (3 GPU allocations: 360.406 MiB, 22.64% memmgmt time)
Calculating distances on every cell on the aggregate
  0.002991 seconds (226 CPU allocations: 10.422 KiB) (9 GPU allocations: 360.406 MiB, 24.29% memmgmt time)
Calculating index of knn on each cell in the aggregate
  0.009656 seconds (2.30 k CPU allocations: 818.266 KiB) (56 GPU allocations: 2.596 MiB, 1.25% memmgmt time)
___________________________________ 

Defining Coordinates of each cell on the aggregates
  0.002925 seconds (110 CPU allocations: 5.594 KiB) (3 GPU allocations: 360.406 MiB, 35.68% memmgmt time)
Calculating distances on every cell on the aggregate
  0.003537 seconds (226 CPU allocations: 10.422 KiB) (9 GPU allocations: 360.406 MiB, 25.53% memmgmt time)
Calculating index of knn on each cell in the aggregate
  0.009811 seconds (2.30 k CPU allocations: 818.562 KiB) (56 GPU allo

  0.009289 seconds (2.35 k CPU allocations: 833.672 KiB) (56 GPU allocations: 2.596 MiB, 1.10% memmgmt time)


In [23]:
X[getindex.(idx,1),:]

14×3240×3 CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
 -13.89  -11.89  -16.89  -14.89  -12.89  …  11.89  13.89  15.89  12.89  14.89
 -14.89  -12.89  -15.89  -13.89  -13.89     10.89  12.89  16.89  11.89  13.89
 -12.89  -10.89  -14.89  -15.89  -11.89     12.89  14.89  14.89  13.89  15.89
 -11.89  -13.89  -15.89  -13.89  -13.89     12.89  12.89  14.89  14.89  12.89
 -13.89  -11.89  -17.89  -16.89  -11.89      9.89  14.89  13.89  12.89  14.89
 -14.89  -12.89  -16.89  -12.89  -14.89  …  13.89  11.89  16.89  11.89  13.89
 -12.89  -10.89  -16.89  -15.89  -10.89     10.89  15.89  14.89  13.89  15.89
 -15.89  -13.89  -14.89  -13.89  -13.89     12.89  12.89  15.89  10.89  16.89
 -11.89   -9.89  -18.89  -14.89  -11.89     11.89  14.89  15.89  14.89  12.89
 -13.89  -11.89  -16.89  -14.89  -12.89     11.89  13.89  17.89  12.89  14.89
 -13.89  -11.89  -16.89  -16.89  -12.89  …   9.89  13.89  13.89  12.89  14.89
 -13.89  -11.89  -13.89  -12.89  -14.89     13.89  11.89  15.89  12.89  14.8