# Installing Dependencies

In [19]:
using DelimitedFiles
using NearestNeighbors: KDTree, knn
using LinearAlgebra: norm
using CUDA
using BenchmarkTools: @btime

# Using kNN

In [20]:
@time function NearNeighbor(X, t, t_knn, nn)
    # Using kNN for Nearest Neighbors
    if t%t_knn | t == 0
        kdtree = KDTree(X')
    end 

    # Loop over all cells to compute displacements
    index, _ = knn(kdtree, X[1,:], nn, true)
    for i in 2:size(X)[1]
        # Scan neighbours
        global idxs, _ = knn(kdtree, X[i,:], nn, true)
        index = hcat(index, idxs)

    end
    return index
end

function force(X, idxs, r_max, s, K )
    # Initialise displacement array
    global dX = zeros(Float64, size(X)[1], 3)

    for i in 1:size(X)[1]
        # Initialise variables
        global Xi = X[i,1:3]
        for j in idxs[:,i]
            if i != j
                global r = Xi - X[j,:]
                global dist = norm(r)
                # Calculate attraction/repulsion force differential here
                if dist < r_max
                    global F = - K*(dist-r_max)*(dist-r_max)*(dist - s)
                    dX[i,:] =  dX[i,:] + r/dist * F
                end 
            end
        end
    end
    return dX
end

  0.000123 seconds (33 allocations: 2.627 KiB)


force (generic function with 1 method)

# Initial Conditions

In [21]:
R_agg = 10
t, t_knn = 0, 10
r_max, s = 2.5, 2
K = 10

nn = 20

@time X = Float32.(readdlm("../../data/init/Sphere/$R_agg.xyz")[3:end,2:end]) |> cu
@time idx = NearNeighbor(Matrix(X), t, t_knn, nn)

  0.011175 seconds (12.02 k allocations: 406.750 KiB)
  0.148520 seconds (398.42 k allocations: 66.668 MiB, 15.17% gc time, 86.71% compilation time)


20×746 Matrix{Int64}:
  1   2   3   4   5   6   7   8   9  10  …  740  741  742  743  744  745  746
 23  24  25  35  35  36  38  41  42  44     710  710  712  713  722  723  724
 29  29  31  34  36  37  37  42  43  43     709  711  711  712  717  717  719
 28  30  30   9  10  11  11  12   4   6     736  737  738  739  716  718  718
  2   1   2   8   9  10   6   4  12  13     737  738  739  742  745  746  745
  4   3   6   5   4   5  31   9   5   5  …  741  740  743  719  740  744  742
  5   5   7  28   6   7   3  34  13  14     716  742  741  746  741  742  743
 22   6  26   1  29  30  45  48  10  11     744  717  718  706  721  741  725
 24  23  24  42   2   3  32  35   8   9     703  745  745  718  723  722  723
 35  25  37  29   1   2  30  33  35  36     717  744  746  720  710  724  712
 76  36  78  27  43  44  86  91  49  50  …  715  704  705  664  670  711  672
 18  77  21  83  28  31  46  40  34  35     661  718  719  707  727  671  728
 19  20  20  41  30  29  44  43  36  37   

# CUDA Forces

## Define Variables for CUDA

In [22]:
@time X_Cu = X |> cu
@time dX_Cu = zeros(size(X)[1],3) |> cu
@time idx_Cu = idx |> cu
@time r = zeros(14,size(X)[1],3) |> cu
@time dist = zeros(14, size(X)[1]) |> cu
@time F = zeros(14, size(X)[1],3) |> cu;

  0.000003 seconds
  0.000065 seconds (11 allocations: 26.688 KiB)
  0.000063 seconds (6 allocations: 192 bytes)
  0.000204 seconds (12 allocations: 367.656 KiB)
  0.000088 seconds (12 allocations: 122.828 KiB)
  0.000092 seconds (12 allocations: 367.656 KiB)


## Generating Forces

In [23]:
# Finding Distances
println("Finding Distances")
r = reshape(repeat(X, inner=(nn,1)), nn, size(X)[1], 3) - X[getindex.(idx,1),:]

Finding Distances


20×746×3 CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
  0.0   0.0   0.0   0.0   0.0   0.0  …   0.0   0.0   0.0   0.0   0.0   0.0
  0.0   0.0   0.0  -1.0   1.0   1.0      1.0  -1.0  -1.0   0.0   0.0   0.0
 -1.0   1.0  -1.0   1.0  -1.0  -1.0     -1.0   1.0   1.0  -1.0   1.0  -1.0
  1.0  -1.0   1.0  -1.0  -1.0  -1.0      1.0   1.0   1.0   1.0  -1.0   1.0
 -2.0   2.0   2.0   1.0   1.0   1.0     -1.0  -1.0   2.0  -2.0  -2.0   2.0
  1.0  -2.0   1.0  -2.0   2.0   2.0  …   2.0  -2.0   0.0   1.0   2.0   1.0
 -1.0   1.0  -1.0   0.0  -2.0  -2.0     -2.0   2.0   1.0  -1.0  -1.0  -1.0
  2.0  -1.0  -2.0  -1.0   0.0   0.0      0.0   0.0   0.0   2.0   1.0  -2.0
 -2.0   2.0   2.0   0.0  -1.0  -1.0     -1.0   1.0   2.0  -2.0   2.0   2.0
  0.0  -2.0   0.0  -2.0   1.0   1.0      1.0  -1.0  -2.0   0.0  -2.0   0.0
  0.0   0.0   0.0   2.0   0.0   0.0  …   0.0   0.0   0.0   0.0   0.0   0.0
  1.0   0.0  -1.0   0.0   2.0  -2.0     -2.0  -2.0  -2.0  -1.0   0.0   1.0
 -1.0  -1.0   1.0   2.0  -2.0   2.0

In [92]:
X[idx[getindex.(rand(2:nn,size(X,1)) ,1),:][1,:], :]

746×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
  0.5  -4.04  -7.35
 -1.5  -4.04  -7.35
  0.5  -4.04  -7.35
 -2.5   1.15  -7.35
  0.5  -2.89  -8.98
  2.5  -2.89  -8.98
  5.5  -2.31  -7.35
 -1.5  -0.58  -7.35
 -3.5   0.58  -8.98
 -1.5   0.58  -8.98
  2.5  -0.58  -7.35
 -2.5   4.62  -7.35
 -2.5   2.31  -8.98
  ⋮           
  1.5  -1.15   7.35
 -1.5   0.58   7.35
  0.5  -0.58   8.98
  2.5  -0.58   8.98
  2.5   0.58   7.35
 -2.5  -1.15   7.35
  0.5   2.89   8.98
  0.5   2.89   8.98
  1.5   2.31   7.35
  0.5   4.04   7.35
 -1.5   4.04   7.35
  0.5   4.04   7.35

: 

In [52]:
@time r_p = X[idx[CartesianIndex.(rand(2:nn,size(X,1)),1:size(X,1)')],:] |> cu

  0.000186 seconds (68 allocations: 26.375 KiB)


746×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 -1.5   0.58  -8.98
 -1.5  -2.89  -8.98
  1.5  -4.62  -5.72
 -1.5  -2.89  -8.98
  2.5  -0.58  -7.35
  3.5  -2.31  -7.35
  1.5   1.15  -7.35
 -1.5   0.58  -8.98
 -1.5   0.58  -5.72
 -0.5  -1.15  -8.98
  0.5   0.58  -8.98
 -2.5  -1.15  -8.98
 -3.5   0.58  -8.98
  ⋮           
  2.5  -4.04   5.72
 -2.5  -2.31   8.98
 -2.5   1.15   8.98
 -1.5  -0.58   8.98
  0.5  -2.89   7.35
 -1.5  -0.58   8.98
  2.5  -0.58   8.98
  3.5  -1.15   7.35
  2.5  -0.58   8.98
 -0.5   2.31   7.35
  1.5   2.31   7.35
 -0.5   2.31   7.35

In [8]:
dist_p = (sum(r_p .^ 2, dims=2).^ 0.5)

746×1 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
 3.4630911091852
 3.4630911091852
 2.0068882102377086
 3.4614734105575717
 1.9954444740042827
 3.4564285701267115
 2.819024380949584
 2.00062455915871
 2.0
 3.4600000769554526
 2.8288687892351927
 1.9983240496515502
 2.0
 ⋮
 3.463076651579395
 2.827189422468466
 3.2599998102012533
 2.827189422468466
 1.9982242449567402
 2.0
 1.9954444740042827
 2.00062455915871
 3.2599998102012533
 3.4680830855663065
 1.9948429137603378
 3.4631342062615205

In [9]:
r_p ./ dist_p

746×3 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
 -0.577519  -0.667034  -0.470678
  0.577519  -0.667034  -0.470678
 -0.498284  -0.867014   0.0
  0.866683  -0.16467   -0.470898
  0.501141  -0.285651  -0.81686
  0.578632  -0.665427  -0.471585
  0.0       -0.815885  -0.578214
  0.0        0.579819  -0.814745
  1.0        0.0        0.0
  0.0       -1.0        0.0
  0.706996   0.410058  -0.576202
 -0.500419  -0.290243  -0.815683
 -1.0        0.0        0.0
  ⋮                    
  0.866282   0.499556   0.0
  0.0        0.817066   0.576544
  0.0        0.0        1.0
  0.0        0.817066   0.576544
 -0.500444  -0.865769   0.0
 -1.0        0.0        0.0
  0.501141   0.285651   0.81686
  0.0       -0.579819   0.814745
  0.0        0.0        1.0
 -0.865031   0.501718   0.0
  0.0       -0.576486   0.817107
  0.866267   0.167478   0.470672

In [14]:
# dX[i,:] +=  r/dist * F
println("Calculationg Forces")
@time F = -K.*((dist .- r_max).^2) .* (dist .- s) .* r ./ dist
F[dist .>  r_max] .= 0

Calculationg Forces
  0.000222 seconds (106 allocations: 11.750 KiB)


LoadError: BoundsError: attempt to access 20×746×3 CuArray{Float64, 3, CUDA.Mem.DeviceBuffer} at index [CartesianIndex{2}[CartesianIndex(8, 1), CartesianIndex(9, 1), CartesianIndex(10, 1), CartesianIndex(11, 1), CartesianIndex(12, 1), CartesianIndex(13, 1), CartesianIndex(14, 1), CartesianIndex(15, 1), CartesianIndex(16, 1), CartesianIndex(17, 1)  …  CartesianIndex(11, 746), CartesianIndex(12, 746), CartesianIndex(13, 746), CartesianIndex(14, 746), CartesianIndex(15, 746), CartesianIndex(16, 746), CartesianIndex(17, 746), CartesianIndex(18, 746), CartesianIndex(19, 746), CartesianIndex(20, 746)]]

In [11]:
sum(F[2:end,:,:]; dims=1)

1×746×3 CuArray{Float64, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
 3.54506  -1.77636e-15  -20.2583  11.764  …  20.2583  -1.77636e-15  -3.54506

[:, :, 2] =
 6.74017  14.2688  6.74017  19.5501  42.3152  …  -6.74017  -14.2688  -6.74017

[:, :, 3] =
 74.1541  46.9115  74.1541  46.9127  34.0647  …  -74.1541  -46.9115  -74.1541

In [15]:
dX = sum(F[2:end,:,:]; dims=1)[1,:,:]

746×3 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
   3.54506        6.74017    74.1541
  -1.77636e-15   14.2688     46.9115
 -20.2583         6.74017    74.1541
  11.764         19.5501     46.9127
 -11.7557        42.3152     34.0647
 -11.7557        42.3152     34.0647
 -23.6635        12.6511     46.9127
  31.8635        14.2121     74.3932
  31.3488        26.9379     27.9151
  11.7557        24.6648     21.5277
 -27.4525        11.1449     40.7266
  27.4502        -2.20572    40.791
 -11.7574        -6.65113    27.9789
   ⋮                       
 -34.7473        10.4639   -116.167
  31.8635       -14.2121    -74.3932
  15.674        -26.9379    -27.9151
 -11.7557       -24.6648    -21.5277
 -19.5703       -11.1449    -40.7266
  11.764        -19.5501    -46.9127
  11.7557       -42.3152    -34.0647
 -11.7557       -42.3152    -34.0647
 -23.6635       -12.6511    -46.9127
  20.2583        -6.74017   -74.1541
  -1.77636e-15  -14.2688    -46.9115
  -3.54506       -6.74017   -74.1541

In [18]:
fp = 0.001
dX = dX - fp .* (r_p ./ dist_p)

746×3 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
   3.54621        6.7415     74.155
  -0.00115504    14.2702     46.9125
 -20.2573         6.7419     74.1541
  11.7623        19.5505     46.9137
 -11.7567        42.3158     34.0664
 -11.7568        42.3165     34.0657
 -23.6635        12.6527     46.9139
  31.8635        14.2109     74.3948
  31.3468        26.9379     27.9151
  11.7557        24.6668     21.5277
 -27.4539        11.1441     40.7278
  27.4512        -2.20514    40.7927
 -11.7554        -6.65113    27.9789
   ⋮                       
 -34.749         10.4629   -116.167
  31.8635       -14.2137    -74.3943
  15.674        -26.9379    -27.9171
 -11.7557       -24.6664    -21.5288
 -19.5693       -11.1432    -40.7266
  11.766        -19.5501    -46.9127
  11.7547       -42.3158    -34.0664
 -11.7557       -42.314     -34.0664
 -23.6635       -12.6511    -46.9147
  20.26          -6.74117   -74.1541
  -1.77636e-15  -14.2677    -46.9132
  -3.54679       -6.7405    -74.155

In [None]:
# @time force(Matrix(X), Matrix(idx), r_max, s, K )

  0.054837 seconds (226.18 k allocations: 10.674 MiB, 9.36% gc time, 78.12% compilation time)


746×3 Matrix{Float64}:
  0.0          0.0341817    -0.0176344
  0.0          0.0341817    -0.0176344
  0.0          0.0341817    -0.0176344
  0.00834585  -0.029792     -0.0176773
  0.0         -0.0443138    -0.0176773
  0.0         -0.0443138    -0.0176773
 -0.00610839  -0.0259212    -0.0176773
 -0.00447492  -0.00752866   -0.0176773
  0.0         -0.00752866   -0.0176773
  0.0         -0.00752866   -0.0176773
  0.0         -0.00752866   -0.0176773
 -0.00223746   0.000519076  -0.00561201
  0.0         -0.00335173   -0.00561201
  ⋮                         
  0.00223746  -0.000519076   0.00561201
 -0.00447492   0.00752866    0.0176773
  0.0          0.00752866    0.0176773
  0.0          0.00752866    0.0176773
  0.0          0.00752866    0.0176773
  0.00834585   0.029792      0.0176773
  0.0          0.0443138     0.0176773
  0.0          0.0443138     0.0176773
 -0.00610839   0.0259212     0.0176773
  0.0         -0.0341817     0.0176344
  0.0         -0.0341817     0.0176344
  0.0    