# Installing Dependencies

In [4]:
using DelimitedFiles
using NearestNeighbors: KDTree, knn
using CUDA

# Using kNN

In [6]:
@time function NearNeighbor(X, t, t_knn)
    # Using kNN for Nearest Neighbors
    if t%t_knn | t == 0
        kdtree = KDTree(X')
    end 

    # Loop over all cells to compute displacements
    index, _ = knn(kdtree, X[1,:], 14, true)
    for i in 2:size(X)[1]
        # Scan neighbours
        global idxs, _ = knn(kdtree, X[i,:], 14, true)
        index = hcat(index, idxs)

    end
    return index
end

  0.000049 seconds (30 allocations: 2.450 KiB)


NearNeighbor (generic function with 1 method)

# Initial Conditions

In [7]:
R_Agg = 16
t, t_knn = 0, 10
r_max, s = 3, 1
K = 1

@time X = readdlm("../data/Init/Two_Sphere/$R_Agg.csv", ',', Float64, header=true)[1][:, 1:3]
@time idx = NearNeighbor(X, t, t_knn)

  0.453860 seconds (2.09 M allocations: 96.446 MiB, 4.98% gc time, 96.39% compilation time)


  1.009674 seconds (4.73 M allocations: 2.172 GiB, 7.19% gc time, 77.75% compilation time)


14×6064 Matrix{Int64}:
   1    2    3    4    5    6    7  …  6059  6060  6061  6062  6063  6064
  25   26   27   40   42   43   44     6024  6025  6026  6038  6039  6040
  33   33   35   41   41   42   43     6023  6024  6025  6030  6031  6033
  32   34   34    9    9   10   11     6055  6057  6057  6031  6032  6032
   2    3    2    8   10   11    6     6056  6056  6060  6063  6064  6063
   4    1    6    5    4    5   35  …  6058  6061  6033  6059  6062  6061
   5    6    7   32    6    7    3     6060  6059  6064  6058  6060  6060
  24    5   26    1   33   34   51     6031  6032  6017  6037  6059  6039
  26   25   28   48    2    3   34     6062  6063  6034  6039  6038  6041
  41   27   43   33    1    2   36     6063  6064  6032  6023  6040  6025
 102   42  104   31   49   50  114  …  6015  6016  5954  5962  6024  5964
  20  103   22  111   34   35   52     6030  6033  6018  6045  5963  6047
  19   20   21   47   32   33   50     6032  6031  6016  6044  6045  6046
  42   21   44 

# CUDA Forces

## Define Variables for CUDA

In [8]:
@time X_Cu = X |> cu
@time dX_Cu = zeros(size(X)[1],3) |> cu
@time idx_Cu = idx |> cu
@time r = zeros(14,size(X)[1],3) |> cu
@time dist = zeros(14, size(X)[1]) |> cu
@time F = zeros(14, size(X)[1],3) |> cu;

# Joining the Function

In [158]:
function main()

    # Definig Variables for calculing dX
    global X_Cu; global dX_Cu; global idx_Cu

    # Finding Distances
    println("Finding Distances")
    CUDA.@time r = reshape(repeat(X_Cu, inner=(14,1)), 14, size(X_Cu)[1], 3) - X_Cu[idx_Cu,:]

    # Finding Distances/Norm
    println("Finding Distances/Norm")
    CUDA.@time dist = (r[:,:,1] .^ 2 + r[:,:,2] .^ 2 + r[:,:,3] .^ 2) .^ (0.5)
    CUDA.@time dist = reshape(repeat((dist), outer=(1,3)) ,14 ,size(X_Cu)[1], 3)
    
    # Normalizationg Distances
    println("Normalizationg Distances")
    CUDA.@time r = r ./ dist

    # dX[i,:] +=  r/dist * F
    println("Calculationg Forces")
    CUDA.@time F = -K.*((dist .- r_max).^2) .* (dist .- s) .* r

    # Deleting Forces greater than R_Max
    println("Deleting Forces greater than R_Max")
    CUDA.@time F[dist .>  r_max] .= 0

    # Calculating de dX
    println("Calculationg dX")
    CUDA.@time dX_Cu[:,1] = sum(F[:,:,1][2:end,:]; dims=1)
    CUDA.@time dX_Cu[:,2] = sum(F[:,:,2][2:end,:]; dims=1)
    CUDA.@time dX_Cu[:,3] = sum(F[:,:,3][2:end,:]; dims=1)

    # Euler Step for all Cells
    dX_Cu
end

main (generic function with 1 method)

In [156]:
main()

Finding Distances
  0.011464 seconds (54 CPU allocations: 2.438 KiB) (3 GPU allocations: 2.915 MiB, 0.42% memmgmt time)
Finding Distances/Norm
  0.000752 seconds (225 CPU allocations: 10.422 KiB) (9 GPU allocations: 3.239 MiB, 2.10% memmgmt time)
  0.000133 seconds (21 CPU allocations: 960 bytes) (1 GPU allocation: 1.943 MiB, 3.23% memmgmt time)
Normalizationg Distances
  0.000475 seconds (67 CPU allocations: 3.812 KiB) (1 GPU allocation: 1.943 MiB, 0.53% memmgmt time)
Calculationg Forces
  0.000432 seconds (140 CPU allocations: 12.391 KiB) (1 GPU allocation: 1.943 MiB, 1.07% memmgmt time)
Deleting Forces greater than R_Max
  0.001237 seconds (318 CPU allocations: 215.562 KiB) (4 GPU allocations: 2.385 MiB, 1.60% memmgmt time)
Calculationg dX
  0.000089 seconds (73 CPU allocations: 3.188 KiB) (3 GPU allocations: 1.295 MiB, 9.10% memmgmt time)
  0.000064 seconds (73 CPU allocations: 3.188 KiB) (3 GPU allocations: 1.295 MiB, 8.89% memmgmt time)
  0.000060 seconds (73 CPU allocations: 3.1

6064×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
  1.0           1.72299      2.55151
 -1.38778f-17   1.72299      2.55151
 -1.0           1.72299      2.55151
  1.49483       0.872724     2.55154
  0.0           0.0117228    2.55154
  0.0           0.0117228    2.55154
 -1.99616       0.00542035   2.55154
  2.00266      -0.00490577   2.54855
  0.0          -0.00490577   2.54855
  0.0          -0.00490577   2.54855
  ⋮                         
  0.0           0.00490577  -2.54855
 -1.50133       0.872209    -2.54855
  1.49483      -0.872724    -2.55154
  0.0          -0.0117228   -2.55154
  0.0          -0.0117228   -2.55154
 -1.99616      -0.00542035  -2.55154
  1.0          -1.72299     -2.55151
 -1.38778f-17  -1.72299     -2.55151
 -1.0          -1.72299     -2.55151