# Installing Dependencies

In [2]:
using DelimitedFiles
using NearestNeighbors: KDTree, knn
using LinearAlgebra: norm
using CUDA
using BenchmarkTools: @btime

# Using kNN

In [3]:
@time function NearNeighbor(X, t, t_knn)
    # Using kNN for Nearest Neighbors
    if t%t_knn | t == 0
        global kdtree = KDTree(X')
    end 

    # Loop over all cells to compute displacements
    index, _ = knn(kdtree, X[1,:], 14, true)
    for i in 2:size(X)[1]
        # Scan neighbours
        global idxs, _ = knn(kdtree, X[i,:], 14, true)
        index = hcat(index, idxs)

    end
    return index
end

  0.002791 seconds (998 allocations: 64.155 KiB, 85.92% compilation time)


NearNeighbor (generic function with 1 method)

# Initial Conditions

In [9]:
R_Agg = 14
t, t_knn = 0, 10
r_max, s = 2, 1
K = 10

@time X = readdlm("../data/Init/Two_Sphere/$R_Agg.csv", ',', Float64, header=true)[1][:, 1:3]
@time idx = NearNeighbor(X, t, t_knn)

  0.010229 seconds (374.14 k allocations: 10.079 MiB)


  0.102025 seconds (24.29 k allocations: 886.361 MiB, 11.72% gc time)


14×4068 Matrix{Int64}:
  1   2   3    4    5    6    7    8  …  4063  4064  4065  4066  4067  4068
 20  20  26    8    8   11   12    5     4057  4061  4061  4042  4048  4049
 19  21  27    7    9   12    4    4     4058  4060  4062  4041  4049  4050
  3   3   2    5    4    7   13   14     4062  4065  4064  4068  4066  4066
  2   1   1   54   54   52    6   13     4017  4014  4015  4067  4068  4067
 26  27  33   53   55   60    8    9  …  4009  4015  4016  4035  4041  4042
 13  14  20   47   48   59   53    7     4010  4020  4021  4049  4055  4056
 27  26  34   61   62   51   60   54     4018  4007  4008  4034  4040  4041
 25  28  32   48   49   53   61   62     4016  4019  4020  4036  4042  4043
 78  79  88   46   47   68   52   61     4001  4021  4022  3981  3989  3990
 21  22  21  135  136  143   54   55  …  3925  3932  3933  4050  4050  4048
 18  19  19   13   14    4   69   53     4065  4055  4056  4048  4047  4051
 14  13  28    9   10   13  144   70     4056  4059  4060  4040  

# CUDA Forces

## Define Variables for CUDA

In [11]:
@time X_Cu = zeros(size(X)[1],3) |> cu
@time dX_Cu = zeros(size(X)[1],3) |> cu
@time idx_Cu = zeros(14,size(X)[1]) |> cu
@time r = zeros(14,size(X)[1],3) |> cu
@time dist = zeros(14, size(X)[1]) |> cu
@time F = zeros(14, size(X)[1],3) |> cu;

  0.000078 seconds (12 allocations: 143.391 KiB)
  0.000083 seconds (12 allocations: 143.391 KiB)
  0.000107 seconds (12 allocations: 667.766 KiB)
  0.000233 seconds (12 allocations: 1.956 MiB)
  0.000130 seconds (12 allocations: 667.766 KiB)


  0.000422 seconds (12 allocations: 1.956 MiB)


## Adding Variables to CUDA

In [13]:
@time X_Cu = X |> cu
@time idx_Cu = idx |> cu;

  0.000080 seconds (8 allocations: 47.922 KiB)
  0.000074 seconds (6 allocations: 192 bytes)


## Generating Forces

In [45]:
# Finding Distances
println("Finding Distances")
@time r[:,:,1] =repeat(X_Cu[idx_Cu[1,:],1]', 14) - X_Cu[idx_Cu[:,:],1]
@time r[:,:,2] =repeat(X_Cu[idx_Cu[1,:],2]', 14) - X_Cu[idx_Cu[:,:],2]
@time r[:,:,3] =repeat(X_Cu[idx_Cu[1,:],3]', 14) - X_Cu[idx_Cu[:,:],3];

Finding Distances
  0.000238 seconds (282 allocations: 13.453 KiB)
  0.000191 seconds (282 allocations: 13.453 KiB)
  0.000222 seconds (282 allocations: 13.453 KiB)


In [46]:
# Finding Distances/Norm
println("Finding Distances/Norm")
@time dist = (r[:,:,1] .^ 2 + r[:,:,2] .^ 2 + r[:,:,3] .^ 2) .^ (0.5)

Finding Distances/Norm
  0.000176 seconds (176 allocations: 7.422 KiB)


14×4068 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
 0.0      0.0      0.0      0.0      …  0.0      0.0      0.0      0.0
 1.99215  1.99215  1.99278  1.99803     1.99803  1.99278  1.99285  1.99215
 1.99285  1.99285  1.99278  1.9981      1.9981   1.99278  1.99215  1.99285
 1.99788  1.99788  1.99788  2.0         2.0      1.99788  1.99788  1.99788
 1.99981  1.99981  1.99788  2.00771     2.00771  1.99788  1.99981  1.99981
 1.99897  1.99897  1.99971  2.00771  …  2.00771  1.99971  1.99897  1.99897
 2.82599  2.82599  2.81563  2.01053     2.01053  2.81563  2.82599  2.82599
 2.82725  2.82725  2.82823  2.83406     2.83406  2.82824  2.82814  2.82725
 2.82814  2.82814  2.82824  2.83588     2.83588  2.82823  2.82725  2.82814
 3.26987  3.26987  3.2699   2.83585     2.83589  3.2699   3.26987  3.26987
 3.45957  3.46063  3.45403  3.26994  …  3.26994  3.45403  3.45957  3.45957
 3.46063  3.45957  3.45403  3.45954     3.45954  3.45403  3.46063  3.46063
 3.46206  3.46206  3.46054  3.46297     3.46297  3.4

In [47]:
@time r[:,:,1][dist .< 2] = r[:,:,1] ./ dist
@time r[:,:,2][dist .< 2] = r[:,:,1] ./ dist
@time r[:,:,3][dist .< 2] = r[:,:,1] ./ dist;

  0.000348 seconds (276 allocations: 12.609 KiB)
  0.000294 seconds (276 allocations: 12.609 KiB)
  0.000285 seconds (276 allocations: 12.609 KiB)


In [48]:
# dX[i,:] +=  r/dist * F
println("Calculationg Forces")
@time F = (-K.*(dist .- r_max).^2) .* (dist .- s) .* r;
@time A = F[:,:,1]; A[dist .> r_max] .= 0; F[:,:,1]=A
@time A = F[:,:,2]; A[dist .> r_max] .= 0; F[:,:,2]=A
@time A = F[:,:,3]; A[dist .> r_max] .= 0; F[:,:,3]=A
F

Calculationg Forces
  0.000166 seconds (79 allocations: 7.562 KiB)
  0.000068 seconds (22 allocations: 928 bytes)
  0.000065 seconds (22 allocations: 928 bytes)


  0.000044 seconds (22 allocations: 928 bytes)


14×4068×3 CuArray{Float64, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
  0.0           0.0          0.0          …   0.0          0.0
  0.00061098   -0.00061098  -0.000517319     -0.00050784  -0.00061098
 -0.000507773   0.00050784   0.000517319      0.00061098   0.00050784
  4.49634e-5   -4.49634e-5   4.49634e-5       4.49634e-5  -4.49634e-5
  7.27457e-7   -7.27457e-7  -4.49634e-5       7.27457e-7  -7.27457e-7
  4.67396e-9   -4.6745e-9   -0.0          …   4.6745e-9   -4.6745e-9
  0.0           0.0          0.0              0.0          0.0
  0.0           0.0          0.0              0.0          0.0
  0.0           0.0          0.0              0.0          0.0
  0.0           0.0          0.0              0.0          0.0
  0.0           0.0          0.0          …   0.0          0.0
  0.0           0.0          0.0              0.0          0.0
  0.0           0.0          0.0              0.0          0.0
  0.0           0.0          0.0              0.0          0.0

[:, :, 2] =
  0.0

In [49]:
# Calculating de dX
println("Calculationg dX")
@time dX_Cu[:,1] = sum(F[:,:,1][2:end,:]; dims=1)
@time dX_Cu[:,2] = sum(F[:,:,2][2:end,:]; dims=1)
@time dX_Cu[:,3] = sum(F[:,:,3][2:end,:]; dims=1);

Calculationg dX
  0.000135 seconds (72 allocations: 3.125 KiB)
  0.000136 seconds (72 allocations: 3.125 KiB)
  0.000111 seconds (72 allocations: 3.125 KiB)


In [50]:
dX_Cu

4068×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
  0.000148902  -0.000546513   0.00183768
 -0.000148835  -0.00054655    0.0018378
  0.0          -0.000743497   0.0016861
  2.86117f-6    0.000129488   0.0
 -2.86117f-6    0.000129488   0.0
  3.03467f-6    0.000160532   0.0
  3.88127f-5    8.8118f-5     0.0
  1.87824f-8    5.60458f-7    0.0
 -3.8812f-5     8.80493f-5    0.0
 -3.0548f-6     0.000160497   0.0
  ⋮                          
  3.8812f-5    -8.80493f-5    0.0
  0.0          -5.93011f-7    0.0
 -3.87925f-5   -8.80832f-5    0.0
 -3.03467f-6   -0.000160532   0.0
  2.86117f-6   -0.000129488   0.0
 -2.86117f-6   -0.000129488   0.0
  0.0           0.000743497  -0.0016861
  0.000148835   0.00054655   -0.0018378
 -0.000148835   0.00054655   -0.0018378

In [51]:
X_Cu = X_Cu + dX_Cu

4068×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 -15.9198  -0.581031  -13.8764
 -13.9202  -0.581032  -13.8764
 -14.92     1.14843   -13.8766
 -15.92    -6.34964   -12.25
 -13.92    -6.34964   -12.25
 -18.92    -4.6196    -12.25
 -16.9199  -4.61979   -12.25
 -14.92    -4.62      -12.25
 -12.9201  -4.61979   -12.25
 -10.92    -4.6196    -12.25
   ⋮                  
  11.9201   4.61979    12.25
  13.92     4.62       12.25
  15.9199   4.61979    12.25
  17.92     4.6196     12.25
  12.92     6.34964    12.25
  14.92     6.34964    12.25
  13.92    -1.14843    13.8766
  12.9202   0.581032   13.8764
  14.9198   0.581032   13.8764