# Installing Dependencies

In [2]:
using DelimitedFiles
using NearestNeighbors: KDTree, knn
using LinearAlgebra: norm
using CUDA

# Using kNN

In [4]:
@time function NearNeighbor(X, t, t_knn)
    # Using kNN for Nearest Neighbors
    if t%t_knn | t == 0
        global kdtree = KDTree(X')
    end 

    # Loop over all cells to compute displacements
    index, _ = knn(kdtree, X[1,:], 14, true)
    for i in 2:size(X)[1]
        # Scan neighbours
        global idxs, _ = knn(kdtree, X[i,:], 14, true)
        index = hcat(index, idxs)

    end
    return index
end

  0.000050 seconds (30 allocations: 2.421 KiB)


NearNeighbor (generic function with 1 method)

# Initial Conditions

In [7]:
R_Agg = 16
t, t_knn = 0, 10

@time X = readdlm("../data/Init/Two_Sphere/$R_Agg.csv", ',', Float64, header=true)[1][:, 1:3]
@time idx = NearNeighbor(X, t, t_knn)

  0.019684 seconds (565.10 k allocations: 15.185 MiB)


  0.221128 seconds (36.27 k allocations: 1.922 GiB, 7.99% gc time)


14×6064 Matrix{Int64}:
   1    2    3    4    5    6    7  …  6059  6060  6061  6062  6063  6064
  25   26   27   40   42   43   44     6024  6025  6026  6038  6039  6040
  33   33   35   41   41   42   43     6023  6024  6025  6030  6031  6033
  32   34   34    9    9   10   11     6055  6057  6057  6031  6032  6032
   2    3    2    8   10   11    6     6056  6056  6060  6063  6064  6063
   4    1    6    5    4    5   35  …  6058  6061  6033  6059  6062  6061
   5    6    7   32    6    7    3     6060  6059  6064  6058  6060  6060
  24    5   26    1   33   34   51     6031  6032  6017  6037  6059  6039
  26   25   28   48    2    3   34     6062  6063  6034  6039  6038  6041
  41   27   43   33    1    2   36     6063  6064  6032  6023  6040  6025
 102   42  104   31   49   50  114  …  6015  6016  5954  5962  6024  5964
  20  103   22  111   34   35   52     6030  6033  6018  6045  5963  6047
  19   20   21   47   32   33   50     6032  6031  6016  6044  6045  6046
  42   21   44 

# CUDA Forces

## Define Variables for CUDA

In [10]:
@time X_Cu = zeros(size(X)[1],3) |> cu
@time dX_Cu = zeros(size(X)[1],3) |> cu
@time idx_Cu = zeros(14,size(X)[1]) |> cu
@time r = zeros(14,size(X)[1],3) |> cu
@time dist = zeros(14, size(X)[1]) |> cu
@time F = zeros(14, size(X)[1],3) |> cu;

  0.000085 seconds (12 allocations: 213.516 KiB)
  0.000080 seconds (12 allocations: 213.516 KiB)
  0.000141 seconds (12 allocations: 995.203 KiB)
  0.000287 seconds (12 allocations: 2.915 MiB)
  0.000201 seconds (12 allocations: 995.203 KiB)


  0.000715 seconds (12 allocations: 2.915 MiB)


## Adding Variables to CUDA

In [13]:
@time X_Cu = X |> cu
@time idx_Cu = idx |> cu;

  0.000075 seconds (8 allocations: 71.297 KiB)
  0.000089 seconds (6 allocations: 192 bytes)


## Generating Forces

In [49]:
# Finding Distances
println("Finding Distances")
@time r[:,:,1] =repeat(X_Cu[idx_Cu[1,:],1]', 14) - X_Cu[idx_Cu[:,:],1]
@time r[:,:,2] =repeat(X_Cu[idx_Cu[1,:],2]', 14) - X_Cu[idx_Cu[:,:],2]
@time r[:,:,3] =repeat(X_Cu[idx_Cu[1,:],3]', 14) - X_Cu[idx_Cu[:,:],3];

Finding Distances
  0.000208 seconds (282 allocations: 13.453 KiB)
  0.000230 seconds (282 allocations: 13.453 KiB)
  0.000295 seconds (282 allocations: 13.453 KiB)


In [50]:
# Finding Distances/Norm
println("Finding Distances/Norm")
@time dist = (r[:,:,1] .^ 2 + r[:,:,2] .^ 2 + r[:,:,3] .^ 2) .^ (0.5)
@time r[:,:,1] =r[:,:,1] ./ dist
@time r[:,:,2] =r[:,:,2] ./ dist
@time r[:,:,3] =r[:,:,3] ./ dist;

Finding Distances/Norm
  0.000172 seconds (176 allocations: 7.422 KiB)
  0.000101 seconds (54 allocations: 2.375 KiB)
  0.000111 seconds (54 allocations: 2.375 KiB)
  0.000078 seconds (54 allocations: 2.375 KiB)


In [51]:
# F = - K*(dist-r_max)*(dist-r_max)*(dist - s)
r_max, s = 4, 1
K = 0.1

# dX[i,:] +=  r/dist * F
println("Calculationg Forces")
@time F = (-K.*(dist .- r_max).^2) .* (dist .- s) .* r;

Calculationg Forces
  0.000368 seconds (80 allocations: 7.578 KiB)


In [52]:
# Calculating de dX
println("Calculationg dX")
@time dX_Cu[:,1] = sum(F[:,:,1][2:end,:]; dims=1)
@time dX_Cu[:,2] = sum(F[:,:,2][2:end,:]; dims=1)
@time dX_Cu[:,3] = sum(F[:,:,3][2:end,:]; dims=1);

Calculationg dX
  0.000123 seconds (72 allocations: 3.125 KiB)
  0.000112 seconds (72 allocations: 3.125 KiB)
  0.000092 seconds (72 allocations: 3.125 KiB)


In [53]:
dX_Cu

6064×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
  0.441006      0.62202      1.64062
 -2.77556f-17   0.574658     1.6072
 -0.358994      0.62202      1.64062
  0.599306      0.440277     1.60735
  0.0          -0.00311011   1.53889
  0.0          -0.00311011   1.53889
 -0.861352      0.105725     1.64096
  0.861867     -0.0157381    1.60433
  0.0          -0.00377605   1.5371
  0.0          -0.00377605   1.5371
  ⋮                         
  0.0           0.00377605  -1.5371
 -0.600177      0.326573    -1.60433
  0.599306     -0.440277    -1.60735
  0.0           0.00311011  -1.53889
  0.0           0.00311011  -1.53889
 -0.737615     -0.105725    -1.64096
  0.441006     -0.62202     -1.64062
 -2.77556f-17  -0.574658    -1.6072
 -0.358994     -0.62202     -1.64062