# Installing Dependencies

In [1]:
using DelimitedFiles
using NearestNeighbors: KDTree, knn
using LinearAlgebra: norm
using CUDA
using BenchmarkTools: @btime

# Using kNN

In [2]:
@time function NearNeighbor(X, t, t_knn)
    # Using kNN for Nearest Neighbors
    if t%t_knn | t == 0
        kdtree = KDTree(X')
    end 

    # Loop over all cells to compute displacements
    index, _ = knn(kdtree, X[1,:], 14, true)
    for i in 2:size(X)[1]
        # Scan neighbours
        global idxs, _ = knn(kdtree, X[i,:], 14, true)
        index = hcat(index, idxs)

    end
    return index
end

  0.001363 seconds (996 allocations: 64.106 KiB, 94.16% compilation time)


NearNeighbor (generic function with 1 method)

# Initial Conditions

In [4]:
R_Agg = 16
t, t_knn = 0, 10
r_max, s = 2, 1
K = 10

@time X = readdlm("../data/Init/Two_Sphere/$R_Agg.csv", ',', Float64, header=true)[1][:, 1:3]
@time idx = NearNeighbor(X, t, t_knn)

  0.022536 seconds (565.10 k allocations: 15.185 MiB, 36.35% gc time)


  0.197684 seconds (36.27 k allocations: 1.922 GiB, 9.10% gc time)


14×6064 Matrix{Int64}:
   1    2    3    4    5    6    7  …  6059  6060  6061  6062  6063  6064
  25   26   27   40   42   43   44     6024  6025  6026  6038  6039  6040
  33   33   35   41   41   42   43     6023  6024  6025  6030  6031  6033
  32   34   34    9    9   10   11     6055  6057  6057  6031  6032  6032
   2    3    2    8   10   11    6     6056  6056  6060  6063  6064  6063
   4    1    6    5    4    5   35  …  6058  6061  6033  6059  6062  6061
   5    6    7   32    6    7    3     6060  6059  6064  6058  6060  6060
  24    5   26    1   33   34   51     6031  6032  6017  6037  6059  6039
  26   25   28   48    2    3   34     6062  6063  6034  6039  6038  6041
  41   27   43   33    1    2   36     6063  6064  6032  6023  6040  6025
 102   42  104   31   49   50  114  …  6015  6016  5954  5962  6024  5964
  20  103   22  111   34   35   52     6030  6033  6018  6045  5963  6047
  19   20   21   47   32   33   50     6032  6031  6016  6044  6045  6046
  42   21   44 

# CUDA Forces

## Define Variables for CUDA

In [6]:
@time X_Cu = X |> cu
@time dX_Cu = zeros(size(X)[1],3) |> cu
@time idx_Cu = idx |> cu
@time r = zeros(14,size(X)[1],3) |> cu
@time dist = zeros(14, size(X)[1]) |> cu
@time F = zeros(14, size(X)[1],3) |> cu;

  0.000068 seconds (8 allocations: 71.297 KiB)
  0.000083 seconds (12 allocations: 213.516 KiB)
  0.000099 seconds (6 allocations: 192 bytes)
  0.000526 seconds (12 allocations: 2.915 MiB)
  0.000133 seconds (12 allocations: 995.203 KiB)
  0.000605 seconds (12 allocations: 2.915 MiB)


## Generating Forces

In [14]:
# Finding Distances
println("Finding Distances")
CUDA.@time r[:,:,1] =repeat(X_Cu[idx_Cu[1,:],1]', 14) - X_Cu[idx_Cu[:,:],1]
CUDA.@time r[:,:,2] =repeat(X_Cu[idx_Cu[1,:],2]', 14) - X_Cu[idx_Cu[:,:],2]
CUDA.@time r[:,:,3] =repeat(X_Cu[idx_Cu[1,:],3]', 14) - X_Cu[idx_Cu[:,:],3];

Finding Distances


  0.000221 seconds (284 CPU allocations: 13.484 KiB) (6 GPU allocations: 1.689 MiB, 8.86% memmgmt time)
  0.000222 seconds (283 CPU allocations: 13.516 KiB) (6 GPU allocations: 1.689 MiB, 9.30% memmgmt time)


  0.000230 seconds (285 CPU allocations: 13.547 KiB) (6 GPU allocations: 1.689 MiB, 8.77% memmgmt time)


In [16]:
# Finding Distances/Norm
println("Finding Distances/Norm")
CUDA.@time dist = (r[:,:,1] .^ 2 + r[:,:,2] .^ 2 + r[:,:,3] .^ 2) .^ (0.5)

Finding Distances/Norm


  0.000243 seconds (219 CPU allocations: 10.719 KiB) (9 GPU allocations: 3.239 MiB, 8.82% memmgmt time)


14×6064 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
 0.0      0.0      0.0      0.0      …  0.0      0.0      0.0      0.0
 1.99484  1.99484  1.99484  1.99544     1.99544  1.99484  1.99484  1.99484
 1.99832  1.99832  1.99832  1.99544     1.99544  1.99832  1.99832  1.99832
 1.99832  1.99832  1.99832  1.99822     1.99822  1.99832  1.99832  1.99832
 2.0      2.0      2.0      1.99822     2.0      2.0      2.0      2.0
 2.00689  2.0      2.00689  2.0      …  2.00063  2.00689  2.0      2.00689
 2.00689  2.00689  2.00689  2.00063     2.00689  2.00689  2.00689  2.00689
 2.82478  2.00689  2.82478  2.00689     2.81902  2.82478  2.00689  2.82478
 2.82478  2.82478  2.82478  2.81902     2.82887  2.82478  2.82478  2.82478
 2.82719  2.82478  2.82719  2.82887     2.82887  2.82719  2.82478  2.82719
 3.26     2.82719  3.26     2.82887  …  3.26     3.26     2.82719  3.26
 3.45707  3.26     3.45707  3.26        3.45643  3.45707  3.26     3.45707
 3.45707  3.45707  3.45707  3.45643     3.45643  3.45707  3

In [20]:
@time r[:,:,1][dist .< 2] = r[:,:,1] ./ dist
@time r[:,:,2][dist .< 2] = r[:,:,1] ./ dist
@time r[:,:,3][dist .< 2] = r[:,:,1] ./ dist;

  0.000335 seconds (276 allocations: 12.609 KiB)
  0.000240 seconds (276 allocations: 12.609 KiB)
  0.000337 seconds (276 allocations: 12.609 KiB)


In [23]:
# dX[i,:] +=  r/dist * F
println("Calculationg Forces")
@time F = (-K.*(dist .- r_max).^2) .* (dist .- s) .* r
CUDA.@time A = F[:,:,1]; A[dist .> r_max] .= 0; F[:,:,1]=A
CUDA.@time A = F[:,:,2]; A[dist .> r_max] .= 0; F[:,:,2]=A
CUDA.@time A = F[:,:,3]; A[dist .> r_max] .= 0; F[:,:,3]=A;

Calculationg Forces
  0.000148 seconds (79 allocations: 7.562 KiB)


  0.000133 seconds (64 CPU allocations: 4.281 KiB) (1 GPU allocation: 663.250 KiB, 7.59% memmgmt time)
  0.000158 seconds (23 CPU allocations: 992 bytes) (1 GPU allocation: 663.250 KiB, 6.07% memmgmt time)


  0.000123 seconds (65 CPU allocations: 5.297 KiB) (1 GPU allocation: 663.250 KiB, 7.23% memmgmt time)


In [25]:
# Calculating de dX
println("Calculationg dX")
@time dX_Cu[:,1] = sum(F[:,:,1][2:end,:]; dims=1)
@time dX_Cu[:,2] = sum(F[:,:,2][2:end,:]; dims=1)
@time dX_Cu[:,3] = sum(F[:,:,3][2:end,:]; dims=1);

Calculationg dX
  0.000114 seconds (72 allocations: 3.125 KiB)
  0.000084 seconds (72 allocations: 3.125 KiB)
  0.000088 seconds (72 allocations: 3.125 KiB)


In [131]:
X_Cu = X_Cu + dX_Cu

192×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 -6.64974  -1.14791   -4.0755
 -4.64997  -1.14772   -4.07502
 -2.65041  -1.14818   -4.07549
 -7.64925   0.581709  -4.07551
 -5.65001   0.582022  -4.07504
 -3.65054   0.581354  -4.07559
 -6.65001   2.30925   -4.07984
 -4.65002   2.30929   -4.07984
 -5.64996  -4.03623   -2.44451
 -3.65032  -4.0362    -2.44524
  ⋮                   
  6.64968   4.0362     2.44524
  3.64999  -2.30925    4.07984
  5.64998  -2.30929    4.07984
  2.65075  -0.581709   4.07551
  4.64999  -0.582022   4.07504
  6.64946  -0.581354   4.07559
  3.65026   1.14791    4.0755
  5.65003   1.14772    4.07502
  7.64959   1.14818    4.07549

# Joining the Function

In [74]:
function main()
    global X_Cu
    global dX_Cu
    global idx_Cu
    global r 
    global dist

    # Finding Distances
    println("Finding Distances")
    @time r[:,:,1] =repeat(X_Cu[idx_Cu[1,:],1]', 14) - X_Cu[idx_Cu[:,:],1]
    @time r[:,:,2] =repeat(X_Cu[idx_Cu[1,:],2]', 14) - X_Cu[idx_Cu[:,:],2]
    @time r[:,:,3] =repeat(X_Cu[idx_Cu[1,:],3]', 14) - X_Cu[idx_Cu[:,:],3]

    # Finding Distances/Norm
    println("Finding Distances/Norm")
    @time dist = (r[:,:,1] .^ 2 + r[:,:,2] .^ 2 + r[:,:,3] .^ 2) .^ (0.5)

    @time r[:,:,1][dist .< 2] = r[:,:,1] ./ dist
    @time r[:,:,2][dist .< 2] = r[:,:,1] ./ dist
    @time r[:,:,3][dist .< 2] = r[:,:,1] ./ dist

    # dX[i,:] +=  r/dist * F
    println("Calculationg Forces")
    @time F = (-K.*(dist .- r_max).^2) .* (dist .- s) .* r
    @time A = F[:,:,1]; A[dist .> r_max] .= 0; F[:,:,1]=A
    @time A = F[:,:,2]; A[dist .> r_max] .= 0; F[:,:,2]=A
    @time A = F[:,:,3]; A[dist .> r_max] .= 0; F[:,:,3]=A

    # Calculating de dX
    println("Calculationg dX")
    @time dX_Cu[:,1] = sum(F[:,:,1][2:end,:]; dims=1)
    @time dX_Cu[:,2] = sum(F[:,:,2][2:end,:]; dims=1)
    @time dX_Cu[:,3] = sum(F[:,:,3][2:end,:]; dims=1)

    X_Cu = X_Cu + dX_Cu
end

main (generic function with 3 methods)

In [138]:
main()

Finding Distances
  0.000305 seconds (282 allocations: 13.453 KiB)
  0.000138 seconds (282 allocations: 13.453 KiB)
  0.000131 seconds (282 allocations: 13.453 KiB)
Finding Distances/Norm
  0.000160 seconds (176 allocations: 7.422 KiB)
  0.000366 seconds (276 allocations: 12.609 KiB)
  0.000176 seconds (276 allocations: 12.609 KiB)
  0.000164 seconds (276 allocations: 12.609 KiB)
Calculationg Forces
  0.000076 seconds (79 allocations: 7.562 KiB)
  0.000019 seconds (22 allocations: 928 bytes)
  0.000013 seconds (22 allocations: 928 bytes)
  0.000012 seconds (22 allocations: 928 bytes)
Calculationg dX
  0.000067 seconds (72 allocations: 3.125 KiB)
  0.000038 seconds (72 allocations: 3.125 KiB)
  0.000036 seconds (72 allocations: 3.125 KiB)


6064×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 -23.3632   -1.06139   -18.552
 -16.67     -1.13056   -21.1259
 -20.7863    2.32746   -15.1411
 -16.0632    0.400355  -11.6387
 -11.8024   -3.11442   -12.8912
 -18.6323    0.596963  -13.6524
  -7.99244  -4.03955   -15.365
 -16.3057   -5.064     -15.2417
 -19.9602    2.64833   -14.7279
 -14.7626    2.09645   -15.6331
   ⋮                   
  19.7717   -1.5908     15.83
  15.3854   -2.41841    13.141
  20.7373    2.47712    12.4519
  15.8482    1.09797    16.0622
  15.1888    1.31382    10.303
  24.0298   -1.89541    12.2079
  10.0843    1.3376     17.7336
  17.1197    0.943071   21.0474
  17.8502   -1.73009    14.387