# Installing Dependencies

In [15]:
using DelimitedFiles
using NearestNeighbors: KDTree, knn
using LinearAlgebra: norm
using CUDA
using BenchmarkTools: @btime

# Initial Conditions

In [40]:
R_Agg = 16
X = readdlm("../data/Init/Two_Sphere/$R_Agg.csv", ',', Float64, header=true)[1][:, 1:3]
X_Cu = X |> cu

6064×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 -17.98  -2.89  -15.51
 -15.98  -2.89  -15.51
 -13.98  -2.89  -15.51
 -18.98  -1.15  -15.51
 -16.98  -1.15  -15.51
 -14.98  -1.15  -15.51
 -12.98  -1.15  -15.51
 -19.98   0.58  -15.51
 -17.98   0.58  -15.51
 -15.98   0.58  -15.51
   ⋮            
  16.98  -0.58   15.51
  18.98  -0.58   15.51
  13.98   1.15   15.51
  15.98   1.15   15.51
  17.98   1.15   15.51
  19.98   1.15   15.51
  14.98   2.89   15.51
  16.98   2.89   15.51
  18.98   2.89   15.51

In [75]:
CUDA.@time reshape(repeat(X, size(X ,1)), size(X ,1), size(X ,1), 3)

  0.237692 seconds (8 CPU allocations: 841.647 MiB, 13.01% gc time)


6064×6064×3 Array{Float64, 3}:
[:, :, 1] =
 -17.98  -17.98  -17.98  -17.98  -17.98  …  -17.98  -17.98  -17.98  -17.98
 -15.98  -15.98  -15.98  -15.98  -15.98     -15.98  -15.98  -15.98  -15.98
 -13.98  -13.98  -13.98  -13.98  -13.98     -13.98  -13.98  -13.98  -13.98
 -18.98  -18.98  -18.98  -18.98  -18.98     -18.98  -18.98  -18.98  -18.98
 -16.98  -16.98  -16.98  -16.98  -16.98     -16.98  -16.98  -16.98  -16.98
 -14.98  -14.98  -14.98  -14.98  -14.98  …  -14.98  -14.98  -14.98  -14.98
 -12.98  -12.98  -12.98  -12.98  -12.98     -12.98  -12.98  -12.98  -12.98
 -19.98  -19.98  -19.98  -19.98  -19.98     -19.98  -19.98  -19.98  -19.98
 -17.98  -17.98  -17.98  -17.98  -17.98     -17.98  -17.98  -17.98  -17.98
 -15.98  -15.98  -15.98  -15.98  -15.98     -15.98  -15.98  -15.98  -15.98
   ⋮                                     ⋱    ⋮                     
  16.98   16.98   16.98   16.98   16.98  …   16.98   16.98   16.98   16.98
  18.98   18.98   18.98   18.98   18.98      18.98   18.98   18

# CUDA kNN

## Starting kNN CUDA

In [72]:
CUDA.@time i_Cell = reshape(repeat(X_Cu, size(X ,1)), size(X ,1), size(X ,1), 3)

  0.003328 seconds (69 CPU allocations: 3.812 KiB) (1 GPU allocation: 420.823 MiB, 0.34% memmgmt time)


6064×6064×3 CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
 -17.98  -17.98  -17.98  -17.98  -17.98  …  -17.98  -17.98  -17.98  -17.98
 -15.98  -15.98  -15.98  -15.98  -15.98     -15.98  -15.98  -15.98  -15.98
 -13.98  -13.98  -13.98  -13.98  -13.98     -13.98  -13.98  -13.98  -13.98
 -18.98  -18.98  -18.98  -18.98  -18.98     -18.98  -18.98  -18.98  -18.98
 -16.98  -16.98  -16.98  -16.98  -16.98     -16.98  -16.98  -16.98  -16.98
 -14.98  -14.98  -14.98  -14.98  -14.98  …  -14.98  -14.98  -14.98  -14.98
 -12.98  -12.98  -12.98  -12.98  -12.98     -12.98  -12.98  -12.98  -12.98
 -19.98  -19.98  -19.98  -19.98  -19.98     -19.98  -19.98  -19.98  -19.98
 -17.98  -17.98  -17.98  -17.98  -17.98     -17.98  -17.98  -17.98  -17.98
 -15.98  -15.98  -15.98  -15.98  -15.98     -15.98  -15.98  -15.98  -15.98
   ⋮                                     ⋱    ⋮                     
  16.98   16.98   16.98   16.98   16.98  …   16.98   16.98   16.98   16.98
  18.98   18.98   18.98   18.98   18.9

In [52]:
CUDA.@time n_Cell = permutedims(i_Cell, (2,1,3))

  0.011813 seconds (68 CPU allocations: 3.812 KiB) (1 GPU allocation: 420.823 MiB, 7.06% memmgmt time)


6064×6064×3 CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
 -17.98  -15.98  -13.98  -18.98  -16.98  …  17.98  19.98  14.98  16.98  18.98
 -17.98  -15.98  -13.98  -18.98  -16.98     17.98  19.98  14.98  16.98  18.98
 -17.98  -15.98  -13.98  -18.98  -16.98     17.98  19.98  14.98  16.98  18.98
 -17.98  -15.98  -13.98  -18.98  -16.98     17.98  19.98  14.98  16.98  18.98
 -17.98  -15.98  -13.98  -18.98  -16.98     17.98  19.98  14.98  16.98  18.98
 -17.98  -15.98  -13.98  -18.98  -16.98  …  17.98  19.98  14.98  16.98  18.98
 -17.98  -15.98  -13.98  -18.98  -16.98     17.98  19.98  14.98  16.98  18.98
 -17.98  -15.98  -13.98  -18.98  -16.98     17.98  19.98  14.98  16.98  18.98
 -17.98  -15.98  -13.98  -18.98  -16.98     17.98  19.98  14.98  16.98  18.98
 -17.98  -15.98  -13.98  -18.98  -16.98     17.98  19.98  14.98  16.98  18.98
   ⋮                                     ⋱          ⋮                   
 -17.98  -15.98  -13.98  -18.98  -16.98  …  17.98  19.98  14.98  16.98  18.98
 

In [56]:
CUDA.@time dist_i_Cell = i_Cell - n_Cell

  0.006964 seconds (64 CPU allocations: 3.688 KiB) (1 GPU allocation: 420.823 MiB, 0.14% memmgmt time)


6064×6064×3 CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
  0.0   -2.0   -4.0    1.0   -1.0   -3.0   …  -37.96  -32.96  -34.96  -36.96
  2.0    0.0   -2.0    3.0    1.0   -1.0      -35.96  -30.96  -32.96  -34.96
  4.0    2.0    0.0    5.0    3.0    1.0      -33.96  -28.96  -30.96  -32.96
 -1.0   -3.0   -5.0    0.0   -2.0   -4.0      -38.96  -33.96  -35.96  -37.96
  1.0   -1.0   -3.0    2.0    0.0   -2.0      -36.96  -31.96  -33.96  -35.96
  3.0    1.0   -1.0    4.0    2.0    0.0   …  -34.96  -29.96  -31.96  -33.96
  5.0    3.0    1.0    6.0    4.0    2.0      -32.96  -27.96  -29.96  -31.96
 -2.0   -4.0   -6.0   -1.0   -3.0   -5.0      -39.96  -34.96  -36.96  -38.96
  0.0   -2.0   -4.0    1.0   -1.0   -3.0      -37.96  -32.96  -34.96  -36.96
  2.0    0.0   -2.0    3.0    1.0   -1.0      -35.96  -30.96  -32.96  -34.96
  ⋮                                  ⋮     ⋱    ⋮                     
 34.96  32.96  30.96  35.96  33.96  31.96  …   -3.0     2.0     0.0    -2.0
 36.96  34.96  

In [58]:
CUDA.@time Dist = sqrt.(dist_i_Cell[:,:,1] .^ 2 + dist_i_Cell[:,:,2] .^ 2 + dist_i_Cell[:,:,3] .^ 2)

  0.016868 seconds (226 CPU allocations: 10.359 KiB) (9 GPU allocations: 1.233 GiB, 15.42% memmgmt time)


6064×6064 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
  0.0       2.0       4.0       2.00689  …  45.6291   47.0941   48.5972
  2.0       0.0       2.0       3.46808     44.206    45.6291   47.0941
  4.0       2.0       0.0       5.29411     42.8291   44.206    45.6291
  2.00689   3.46808   5.29411   0.0         46.1719   47.6622   49.1887
  2.00689   2.00689   3.46808   2.0         44.7214   46.1719   47.6622
  3.46808   2.00689   2.00689   4.0      …  43.3147   44.7214   46.1719
  5.29411   3.46808   2.00689   6.0         41.9562   43.3147   44.7214
  4.00511   5.29537   6.93115   1.99822     46.7951   48.3075   49.8544
  3.47      4.00511   5.29537   1.99822     45.3204   46.7951   48.3075
  4.00511   3.47      4.00511   3.46308     43.8873   45.3204   46.7951
  ⋮                                      ⋱                      
 46.7951   45.3204   43.8873   47.4941   …   4.00511   3.47      4.00511
 48.3075   46.7951   45.3204   49.0258       5.29537   4.00511   3.47
 44.7214   43.3147

## Finding each index in kNN CUDA

In [7]:
getindex(Dist,1)

UndefVarError: UndefVarError: Dist not defined