# Installing Dependencies

In [3]:
using CUDA
using DelimitedFiles

# Using kNN

In [5]:
function CukNN(k, i, X)
    THREADS_PER_BLOCK = 256
    X_Cu = X |> cu
    Xi_Cu = repeat(X_Cu[i,:]', size(X_Cu)[1], 1)
    dist = hcat(1:1:size(X_Cu)[1], zeros(Float32, (size(X_Cu)[1]))) |> cu 

    function add!(dist, X, X_o)
        x = (blockIdx().x - 1) * blockDim().x + threadIdx().x 
        @inbounds dist[x, 2] = ( (X[x] - X_o[x])^2 + (X[x, 2] - X_o[x, 2])^2 + (X[x, 3] - X_o[x, 3])^2 ) ^ (0.5)
        return
    end

    threads = THREADS_PER_BLOCK
    blocks = ceil(Int64, length(dist)/threads) 
    @cuda threads=threads blocks=blocks add!(dist, X_Cu, Xi_Cu)
    # global idx = dist[sortperm(dist[:,2]),:][1:14,:1]
    return dist
end

CukNN (generic function with 1 method)

# Initial Conditions

In [7]:
R_Agg = 16
X = readdlm("../data/Init/Two_Sphere/$R_Agg.csv", ',', Float64, header=true)[1][:, 1:3];

# CUDA kNN

In [13]:
@time CukNN(10, 1, X)

  0.018563 seconds (66.84 k allocations: 3.569 MiB)


6064×2 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
    1.0   0.0
    2.0   2.0
    3.0   4.0
    4.0   2.00689
    5.0   2.00689
    6.0   3.46808
    7.0   5.29411
    8.0   4.00511
    9.0   3.47
   10.0   4.00511
    ⋮    
 6056.0  46.7951
 6057.0  48.3075
 6058.0  44.7214
 6059.0  46.1719
 6060.0  47.6622
 6061.0  49.1887
 6062.0  45.6291
 6063.0  47.0941
 6064.0  48.5972