# Installing Dependencies

In [1]:
using DelimitedFiles
using NearestNeighbors: KDTree, knn
using LinearAlgebra: norm
using CUDA
using BenchmarkTools: @btime

# Using kNN

In [2]:
@time function NearNeighbor(X, t, t_knn, nn)
    # Using kNN for Nearest Neighbors
    if t%t_knn | t == 0
        kdtree = KDTree(X')
    end 

    # Loop over all cells to compute displacements
    index, _ = knn(kdtree, X[1,:], nn, true)
    for i in 2:size(X)[1]
        # Scan neighbours
        global idxs, _ = knn(kdtree, X[i,:], nn, true)
        index = hcat(index, idxs)

    end
    return index
end

function force(X, idxs, r_max, s, K )
    # Initialise displacement array
    global dX = zeros(Float64, size(X)[1], 3)

    for i in 1:size(X)[1]
        # Initialise variables
        global Xi = X[i,1:3]
        for j in idxs[:,i]
            if i != j
                global r = Xi - X[j,:]
                global dist = norm(r)
                # Calculate attraction/repulsion force differential here
                if dist < r_max
                    global F = - K*(dist-r_max)*(dist-r_max)*(dist - s)
                    dX[i,:] =  dX[i,:] + r/dist * F
                end 
            end
        end
    end
    return dX
end

  0.002782 seconds (1.02 k allocations: 75.525 KiB, 48.19% compilation time)


force (generic function with 1 method)

# Initial Conditions

In [4]:
R_agg = 10
t, t_knn = 0, 10
r_max, s = 2.5, 2
K = 10

nn = 20

@time X = Float32.(readdlm("../../data/init/Sphere/$R_agg.xyz")[3:end,2:end]) |> cu
@time idx = NearNeighbor(Matrix(X), t, t_knn, nn)

  0.573012 seconds (1.08 M allocations: 64.187 MiB, 2.98% gc time)
  0.844888 seconds (3.15 M allocations: 228.944 MiB, 6.01% gc time, 96.39% compilation time)


20×746 Matrix{Int64}:
  1   2   3   4   5   6   7   8   9  10  …  740  741  742  743  744  745  746
 23  24  25  35  35  36  38  41  42  44     710  710  712  713  722  723  724
 29  29  31  34  36  37  37  42  43  43     709  711  711  712  717  717  719
 28  30  30   9  10  11  11  12   4   6     736  737  738  739  716  718  718
  2   1   2   8   9  10   6   4  12  13     737  738  739  742  745  746  745
  4   3   6   5   4   5  31   9   5   5  …  741  740  743  719  740  744  742
  5   5   7  28   6   7   3  34  13  14     716  742  741  746  741  742  743
 22   6  26   1  29  30  45  48  10  11     744  717  718  706  721  741  725
 24  23  24  42   2   3  32  35   8   9     703  745  745  718  723  722  723
 35  25  37  29   1   2  30  33  35  36     717  744  746  720  710  724  712
 76  36  78  27  43  44  86  91  49  50  …  715  704  705  664  670  711  672
 18  77  21  83  28  31  46  40  34  35     661  718  719  707  727  671  728
 19  20  20  41  30  29  44  43  36  37   

# CUDA Forces

## Define Variables for CUDA

In [5]:
@time X_Cu = X |> cu
@time dX_Cu = zeros(size(X)[1],3) |> cu
@time idx_Cu = idx |> cu
@time r = zeros(14,size(X)[1],3) |> cu
@time dist = zeros(14, size(X)[1]) |> cu
@time F = zeros(14, size(X)[1],3) |> cu;

  0.010407 seconds (19.10 k allocations: 1.251 MiB, 99.78% compilation time)
  0.056964 seconds (141.84 k allocations: 8.335 MiB, 99.81% compilation time)
  0.087771 seconds (247.18 k allocations: 13.966 MiB, 9.58% gc time, 99.91% compilation time)
  0.112867 seconds (372.94 k allocations: 21.730 MiB, 99.88% compilation time)
  0.000052 seconds (12 allocations: 122.828 KiB)
  0.000092 seconds (12 allocations: 367.656 KiB)


## Generating Forces

In [8]:
# Finding Distances
println("Finding Distances")
r = reshape(repeat(X, inner=(nn,1)), nn, size(X)[1], 3) - X[getindex.(idx,1),:];

Finding Distances


In [48]:
# Finding Distances/Norm
println("Finding Distances/Norm")
CUDA.@time dist = ((sum(r .^ 2, dims=3)) .^ 0.5)[:,:,1]
# dist = reshape(repeat((dist), outer=(1,3)) ,nn ,size(X)[1], 3)


Finding Distances/Norm
  0.006356 seconds (286 CPU allocations: 17.578 KiB) (4 GPU allocations: 466.250 KiB, 0.34% memmgmt time)


20×746 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
 0.0      0.0      0.0      0.0      …  0.0      0.0      0.0      0.0
 1.99484  1.99484  1.99484  1.99544     1.99544  1.99484  1.99484  1.99484
 1.99832  1.99832  1.99832  1.99544     1.99544  1.99832  1.99832  1.99832
 1.99832  1.99832  1.99832  1.99822     1.99822  1.99832  1.99832  1.99832
 2.0      2.0      2.0      1.99822     2.0      2.0      2.0      2.0
 2.00689  2.0      2.00689  2.0      …  2.00062  2.00689  2.0      2.00689
 2.00689  2.00689  2.00689  2.00062     2.00689  2.00689  2.00689  2.00689
 2.82478  2.00689  2.82478  2.00689     2.81902  2.82478  2.00689  2.82478
 2.82478  2.82478  2.82478  2.81902     2.82887  2.82478  2.82478  2.82478
 2.82719  2.82478  2.82719  2.82887     2.82887  2.82719  2.82478  2.82719
 3.26     2.82719  3.26     2.82887  …  3.26     3.26     2.82719  3.26
 3.45706  3.26     3.45706  3.26        3.45643  3.45706  3.26     3.45706
 3.45706  3.45706  3.45706  3.45643     3.45643  3.45706  3.

In [43]:
@time r = r ./ dist

  0.000092 seconds (34 allocations: 2.359 KiB)


20×746×3 CuArray{Float64, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
 NaN         NaN         NaN         …  NaN         NaN         NaN
   0.0         0.0         0.0            0.0         0.0         0.0
  -0.500419    0.500419   -0.500419      -0.500419    0.500419   -0.500419
   0.500419   -0.500419    0.500419       0.500419   -0.500419    0.500419
  -1.0         1.0         1.0           -1.0        -1.0         1.0
   0.498284   -1.0         0.498284  …    0.498284    1.0         0.498284
  -0.498284    0.498284   -0.498284      -0.498284   -0.498284   -0.498284
   0.708019   -0.498284   -0.708019       0.708019    0.498284   -0.708019
  -0.708019    0.708019    0.708019      -0.708019    0.708019    0.708019
   0.0        -0.708019    0.0            0.0        -0.708019    0.0
   0.0         0.0         0.0       …    0.0         0.0         0.0
   0.289263    0.0        -0.289263      -0.289263    0.0         0.289263
  -0.289263   -0.289263    0.289263       0.289263    0.289263

In [38]:
r ./ (sum(r .^ 2, dims=3)) .^ 0.5

20×746×3 CuArray{Float64, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
 NaN         NaN         NaN         …  NaN         NaN         NaN
   0.0         0.0         0.0            0.0         0.0         0.0
  -0.500419    0.500419   -0.500419      -0.500419    0.500419   -0.500419
   0.500419   -0.500419    0.500419       0.500419   -0.500419    0.500419
  -1.0         1.0         1.0           -1.0        -1.0         1.0
   0.498284   -1.0         0.498284  …    0.498284    1.0         0.498284
  -0.498284    0.498284   -0.498284      -0.498284   -0.498284   -0.498284
   0.708019   -0.498284   -0.708019       0.708019    0.498284   -0.708019
  -0.708019    0.708019    0.708019      -0.708019    0.708019    0.708019
   0.0        -0.708019    0.0            0.0        -0.708019    0.0
   0.0         0.0         0.0       …    0.0         0.0         0.0
   0.289263    0.0        -0.289263      -0.289263    0.0         0.289263
  -0.289263   -0.289263    0.289263       0.289263    0.289263

In [44]:
# dX[i,:] +=  r/dist * F
println("Calculationg Forces")
@time F = -K.*((dist .- r_max).^2) .* (dist .- s) .* r
F[dist .>  r_max] .= 0
F

Calculationg Forces
  0.000137 seconds (92 allocations: 9.438 KiB)


20×746×3 CuArray{Float64, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
 NaN           NaN           NaN           …  NaN           NaN
   0.0           0.0           0.0              0.0           0.0
  -0.00211077    0.00211077   -0.00211077       0.00211077   -0.00211077
   0.00211077   -0.00211077    0.00211077      -0.00211077    0.00211077
   0.0          -0.0          -0.0              0.0          -0.0
  -0.00834592    0.0          -0.00834592  …   -0.0          -0.00834592
   0.00834592   -0.00834592    0.00834592       0.00834592    0.00834592
   0.0           0.00834592    0.0             -0.00834592    0.0
   0.0           0.0           0.0              0.0           0.0
   0.0           0.0           0.0              0.0           0.0
   0.0           0.0           0.0         …    0.0           0.0
   0.0           0.0           0.0              0.0           0.0
   0.0           0.0           0.0              0.0           0.0
   0.0           0.0           0.0              0.0

In [45]:
sum(F[2:end,:,:]; dims=1)

1×746×3 CuArray{Float64, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
 0.0  0.0  0.0  0.00834592  0.0  0.0  …  0.0  -0.00610844  0.0  0.0  0.0

[:, :, 2] =
 0.0341819  0.0341819  0.0341819  …  -0.0341819  -0.0341819  -0.0341819

[:, :, 3] =
 -0.0176343  -0.0176343  -0.0176343  …  0.0176343  0.0176343  0.0176343

In [46]:
sum(F[2:end,:,:]; dims=1)[1,:,:]

746×3 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
  0.0          0.0341819    -0.0176343
  0.0          0.0341819    -0.0176343
  0.0          0.0341819    -0.0176343
  0.00834592  -0.0297922    -0.0176778
  0.0         -0.0443141    -0.0176778
  0.0         -0.0443141    -0.0176778
 -0.00610844  -0.0259213    -0.0176778
 -0.00447495  -0.00752861   -0.0176778
  0.0         -0.00752861   -0.0176778
  0.0         -0.00752861   -0.0176778
  0.0         -0.00752861   -0.0176778
 -0.00223747   0.000519265  -0.00561216
  0.0         -0.00335157   -0.00561216
  ⋮                         
  0.00223747  -0.000519265   0.00561216
 -0.00447495   0.00752861    0.0176778
  0.0          0.00752861    0.0176778
  0.0          0.00752861    0.0176778
  0.0          0.00752861    0.0176778
  0.00834592   0.0297922     0.0176778
  0.0          0.0443141     0.0176778
  0.0          0.0443141     0.0176778
 -0.00610844   0.0259213     0.0176778
  0.0         -0.0341819     0.0176343
  0.0         -0.0341

In [47]:
@time force(Matrix(X), Matrix(idx), r_max, s, K )

  0.054837 seconds (226.18 k allocations: 10.674 MiB, 9.36% gc time, 78.12% compilation time)


746×3 Matrix{Float64}:
  0.0          0.0341817    -0.0176344
  0.0          0.0341817    -0.0176344
  0.0          0.0341817    -0.0176344
  0.00834585  -0.029792     -0.0176773
  0.0         -0.0443138    -0.0176773
  0.0         -0.0443138    -0.0176773
 -0.00610839  -0.0259212    -0.0176773
 -0.00447492  -0.00752866   -0.0176773
  0.0         -0.00752866   -0.0176773
  0.0         -0.00752866   -0.0176773
  0.0         -0.00752866   -0.0176773
 -0.00223746   0.000519076  -0.00561201
  0.0         -0.00335173   -0.00561201
  ⋮                         
  0.00223746  -0.000519076   0.00561201
 -0.00447492   0.00752866    0.0176773
  0.0          0.00752866    0.0176773
  0.0          0.00752866    0.0176773
  0.0          0.00752866    0.0176773
  0.00834585   0.029792      0.0176773
  0.0          0.0443138     0.0176773
  0.0          0.0443138     0.0176773
 -0.00610839   0.0259212     0.0176773
  0.0         -0.0341817     0.0176344
  0.0         -0.0341817     0.0176344
  0.0    