# Installing Dependencies

In [1]:
using DelimitedFiles
using CUDA
using BenchmarkTools: @btime
include("../../src/forces.jl")
include("../../src/Neighbor.jl")

cu_knn (generic function with 1 method)

# Initial Conditions

In [2]:
R_agg = 10
t, t_knn = 0, 10
r_max, s = 2.5, 2
K = 10

nn = 20

@time X = Float32.(readdlm("../../data/init/Sphere/$R_agg.xyz")[3:end,2:end]) |> cu

  1.485584 seconds (3.52 M allocations: 205.832 MiB, 2.85% gc time, 22.80% compilation time)


746×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 -1.5  -2.89  -8.98
  0.5  -2.89  -8.98
  2.5  -2.89  -8.98
 -2.5  -1.15  -8.98
 -0.5  -1.15  -8.98
  1.5  -1.15  -8.98
  3.5  -1.15  -8.98
 -3.5   0.58  -8.98
 -1.5   0.58  -8.98
  0.5   0.58  -8.98
  2.5   0.58  -8.98
 -2.5   2.31  -8.98
 -0.5   2.31  -8.98
  ⋮           
  3.5  -2.31   8.98
 -3.5  -0.58   8.98
 -1.5  -0.58   8.98
  0.5  -0.58   8.98
  2.5  -0.58   8.98
 -2.5   1.15   8.98
 -0.5   1.15   8.98
  1.5   1.15   8.98
  3.5   1.15   8.98
 -1.5   2.89   8.98
  0.5   2.89   8.98
  2.5   2.89   8.98

# Using kNN

In [3]:
# Inizializate Variables for kNN
i_Cell = CuArray{Float32}(undef, (size(X, 1), size(X, 1), 3))
Dist = CuArray{Float32}(undef, (size(X, 1), size(X, 1)))
idx = hcat([[CartesianIndex(i,1) for i=1:nn] for j=1:size(X,1)]...) |> cu
cu_knn()
idx

20×746 CuArray{Int64, 2, CUDA.Mem.DeviceBuffer}:
  1   2   3   4   5   6   7   8   9  10  …  740  741  742  743  744  745  746
 23  24  25  34  35  36  37  41  42  43     709  710  711  712  722  723  724
 28  29  30  35  36  37  38  42  43  44     710  711  712  713  716  717  718
 29  30  31   8   9  10  11   4   4   5     736  737  738  739  717  718  719
  2   1   2   9  10  11   6  12   5   6     737  738  739  742  745  744  745
  4   3   6   5   4   5  31   9  12  13  …  741  740  741  719  740  746  742
  5   5   7  28   6   7   3  34  13  14     716  742  743  746  741  741  743
 22   6  24   1  29  30  45  48   8   9     744  717  718  706  721  742  723
 24  23  26  42   1   2  30  33  10  11     703  744  745  718  723  722  725
 35  25  37  27   2   3  32  35  35  36     715  745  746  720  710  724  712
 76  36  78  29  43  44  86  91  49  50  …  717  704  705  664  670  711  672
 18  77  20  83  28  29  44  40  34  35     661  716  717  705  726  671  728
 19  19  21  41

# CUDA Forces

## Define Variables for CUDA

In [4]:
@time X_Cu = X |> cu
@time dX_Cu = zeros(size(X)[1],3) |> cu
@time idx_Cu = idx |> cu
@time r = zeros(14,size(X)[1],3) |> cu
@time dist = zeros(14, size(X)[1]) |> cu
@time F = zeros(14, size(X)[1],3) |> cu;

  0.009923 seconds (19.27 k allocations: 1.261 MiB, 99.69% compilation time)
  0.059428 seconds (141.74 k allocations: 8.324 MiB, 90.45% compilation time)
  0.017631 seconds (136.57 k allocations: 8.115 MiB, 99.93% compilation time)
  0.099352 seconds (315.03 k allocations: 18.334 MiB, 99.88% compilation time)
  0.000060 seconds (12 allocations: 122.828 KiB)
  0.000097 seconds (12 allocations: 367.656 KiB)


## Generating Forces

### r

In [5]:
# Finding Distances
println("Finding Distances")
r = reshape(repeat(X, inner=(nn,1)), nn, size(X)[1], 3) - X[getindex.(idx,1),:]

Finding Distances


20×746×3 CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
  0.0   0.0   0.0   0.0   0.0   0.0  …   0.0   0.0   0.0   0.0   0.0   0.0
  0.0   0.0   0.0   1.0   1.0   1.0      1.0   1.0   1.0   0.0   0.0   0.0
  1.0   1.0   1.0  -1.0  -1.0  -1.0     -1.0  -1.0  -1.0   1.0   1.0   1.0
 -1.0  -1.0  -1.0   1.0   1.0   1.0      1.0   1.0   1.0  -1.0  -1.0  -1.0
 -2.0   2.0   2.0  -1.0  -1.0  -1.0     -1.0  -1.0   2.0  -2.0   2.0   2.0
  1.0  -2.0   1.0  -2.0   2.0   2.0  …   2.0   2.0   0.0   1.0  -2.0   1.0
 -1.0   1.0  -1.0   0.0  -2.0  -2.0     -2.0  -2.0   1.0  -1.0   1.0  -1.0
  2.0  -1.0   2.0  -1.0   0.0   0.0      0.0   0.0   0.0   2.0  -1.0   2.0
 -2.0   2.0  -2.0   0.0   1.0   1.0      1.0   1.0   2.0  -2.0   2.0  -2.0
  0.0  -2.0   0.0   2.0  -1.0  -1.0     -1.0  -1.0  -2.0   0.0  -2.0   0.0
  0.0   0.0   0.0  -2.0   0.0   0.0  …   0.0   0.0   0.0   0.0   0.0   0.0
  1.0   0.0   1.0   0.0   2.0   2.0      2.0   2.0   2.0   1.0   0.0   1.0
 -1.0   1.0  -1.0   2.0  -2.0  -2.0

In [6]:
idx

20×746 CuArray{Int64, 2, CUDA.Mem.DeviceBuffer}:
  1   2   3   4   5   6   7   8   9  10  …  740  741  742  743  744  745  746
 23  24  25  34  35  36  37  41  42  43     709  710  711  712  722  723  724
 28  29  30  35  36  37  38  42  43  44     710  711  712  713  716  717  718
 29  30  31   8   9  10  11   4   4   5     736  737  738  739  717  718  719
  2   1   2   9  10  11   6  12   5   6     737  738  739  742  745  744  745
  4   3   6   5   4   5  31   9  12  13  …  741  740  741  719  740  746  742
  5   5   7  28   6   7   3  34  13  14     716  742  743  746  741  741  743
 22   6  24   1  29  30  45  48   8   9     744  717  718  706  721  742  723
 24  23  26  42   1   2  30  33  10  11     703  744  745  718  723  722  725
 35  25  37  27   2   3  32  35  35  36     715  745  746  720  710  724  712
 76  36  78  29  43  44  86  91  49  50  …  717  704  705  664  670  711  672
 18  77  20  83  28  29  44  40  34  35     661  716  717  705  726  671  728
 19  19  21  41

746×746 CuArray{Int64, 2, CUDA.Mem.DeviceBuffer}:
 24  23  26  42   1   2  30  33  10  11  …  703  744  745  718  723  722  725
 22   6  24   1  29  30  45  48   8   9     744  717  718  706  721  742  723
 27  37  29  33  44  45  39  47  41  42     708  705  706  714  715  712  717
  5   5   7  28   6   7   3  34  13  14     716  742  743  746  741  741  743
 68   7  70  23   8   9   2  82  48  12     722  736  737  745  677  743  679
 22   6  24   1  29  30  45  48   8   9  …  744  717  718  706  721  742  723
  4   3   6   5   4   5  31   9  12  13     741  740  741  719  740  746  742
 76  36  78  29  43  44  86  91  49  50     717  704  705  664  670  711  672
 22   6  24   1  29  30  45  48   8   9     744  717  718  706  721  742  723
 68   7  70  23   8   9   2  82  48  12     722  736  737  745  677  743  679
 68   7  70  23   8   9   2  82  48  12  …  722  736  737  745  677  743  679
 27  37  29  33  44  45  39  47  41  42     708  705  706  714  715  712  717
  9   4  11  2

In [19]:
X[[rand(idx[2:end,i]) for i in 1:size(idx,2)],:]

746×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
  0.5  -2.89  -8.98
  1.5  -1.15  -8.98
  3.5  -2.31  -7.35
 -0.5  -1.15  -8.98
 -3.5  -0.58  -7.35
  2.5   0.58  -8.98
  3.5   2.31  -8.98
 -3.5   2.89  -7.35
  1.5   1.15  -7.35
  0.5   2.89  -7.35
  0.5   0.58  -8.98
 -4.5   4.62  -7.35
  1.5   1.15  -7.35
  ⋮           
  3.5  -2.31   5.72
 -5.5  -2.89   7.35
 -1.5  -0.58   5.72
  1.5  -2.31   8.98
  3.5   1.15   8.98
  0.5   0.58   7.35
 -1.5   0.58   7.35
  1.5   1.15   5.72
  4.5   4.04   7.35
  1.5   1.15   8.98
  2.5   2.89   8.98
  4.5   0.58   7.35

In [34]:
r_p = X - X[[rand(idx[2:end,i]) for i in 1:size(idx,2)],:]

746×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
  0.0   1.15  -1.63
  2.0   0.0    0.0
  1.0  -1.74   0.0
  0.0   1.16  -1.63
 -1.0  -0.57  -1.63
  2.0   1.16  -1.63
  2.0   1.16  -1.63
  0.0   0.0   -3.26
  1.0  -1.73   0.0
 -1.0   1.73   0.0
  3.0   1.73   0.0
  0.0   0.0   -3.26
  2.0  -2.31  -1.63
  ⋮           
  1.0  -1.73   3.26
 -3.0   1.73   0.0
 -1.0  -1.73   0.0
 -3.0   1.73   0.0
 -3.0   0.57   1.63
  3.0   0.57   1.63
  0.0   2.3    1.63
  0.0   2.3    1.63
  3.0   0.57   1.63
 -3.0   0.58   1.63
 -1.0  -2.88   1.63
  2.0  -1.15   1.63

: 

In [34]:
dist = ((sum(r .^ 2, dims=3)) .^ 0.5)[:,:,1]
dist = reshape(repeat((dist), outer=(1,3)) ,nn ,size(X)[1], 3)

20×746×3 CuArray{Float64, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
 0.0      0.0      0.0      0.0      …  0.0      0.0      0.0      0.0
 1.99484  1.99484  1.99484  1.99544     1.99544  1.99484  1.99484  1.99484
 1.99832  1.99832  1.99832  1.99544     1.99544  1.99832  1.99832  1.99832
 1.99832  1.99832  1.99832  1.99822     1.99822  1.99832  1.99832  1.99832
 2.0      2.0      2.0      1.99822     2.0      2.0      2.0      2.0
 2.00689  2.0      2.00689  2.0      …  2.00062  2.00689  2.0      2.00689
 2.00689  2.00689  2.00689  2.00062     2.00689  2.00689  2.00689  2.00689
 2.82478  2.00689  2.82478  2.00689     2.81902  2.82478  2.00689  2.82478
 2.82478  2.82478  2.82478  2.81902     2.82887  2.82478  2.82478  2.82478
 2.82719  2.82478  2.82719  2.82887     2.82887  2.82719  2.82478  2.82719
 3.26     2.82719  3.26     2.82887  …  3.26     3.26     2.82719  3.26
 3.45706  3.26     3.45706  3.26        3.45643  3.45706  3.26     3.45706
 3.45706  3.45706  3.45706  3.45643     3.4564

In [38]:
dist_p = (sum(r_p .^ 2, dims=2).^ 0.5)

746×1 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
 3.8236759412314543
 3.4680830855663065
 3.8236759412314543
 3.4654001092478905
 3.463076651579395
 3.463076651579395
 3.4680830855663065
 3.8236760659379296
 3.4630911091852
 3.463076651579395
 3.4654001092478905
 3.4654001092478905
 3.4631342062615205
 ⋮
 3.8236760659379296
 3.8236760659379296
 3.4630911091852
 3.463076651579395
 3.4654001092478905
 3.4654001092478905
 3.463076651579395
 3.463076651579395
 3.4680830855663065
 3.8236759412314543
 3.4680830855663065
 3.8236759412314543

## F -> Forces

In [36]:
# dX[i,:] +=  r/dist * F
println("Calculationg Forces")
@time F = -K.*((dist .- r_max).^2) .* (dist .- s) .* r ./ dist
F[dist .>  r_max] .= 0
F

Calculationg Forces
  0.000205 seconds (106 allocations: 13.172 KiB)


20×746×3 CuArray{Float64, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
 NaN           NaN           NaN           …  NaN           NaN
   0.0           0.0           0.0              0.0           0.0
   0.00211077    0.00211077    0.00211077       0.00211077    0.00211077
  -0.00211077   -0.00211077   -0.00211077      -0.00211077   -0.00211077
   0.0          -0.0          -0.0             -0.0          -0.0
  -0.00834592    0.0          -0.00834592  …    0.0          -0.00834592
   0.00834592   -0.00834592    0.00834592      -0.00834592    0.00834592
   0.0           0.00834592    0.0              0.00834592    0.0
   0.0           0.0           0.0              0.0           0.0
   0.0           0.0           0.0              0.0           0.0
   0.0           0.0           0.0         …    0.0           0.0
   0.0           0.0           0.0              0.0           0.0
   0.0           0.0           0.0              0.0           0.0
   0.0           0.0           0.0              0.0

# dX -> Final

In [40]:
dX = sum(F[2:end,:,:]; dims=1)[1,:,:]

746×3 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
  0.0          0.0341819    -0.0176343
  0.0          0.0341819    -0.0176343
  0.0          0.0341819    -0.0176343
  0.00834592  -0.0297922    -0.0176778
  0.0         -0.0443141    -0.0176778
  0.0         -0.0443141    -0.0176778
 -0.00610844  -0.0259213    -0.0176778
 -0.00447495  -0.00752861   -0.0176778
  0.0         -0.00752861   -0.0176778
  0.0         -0.00752861   -0.0176778
  0.0         -0.00752861   -0.0176778
 -0.00223747   0.000519265  -0.00561216
  0.0         -0.00335157   -0.00561216
  ⋮                         
  0.00223747  -0.000519265   0.00561216
 -0.00447495   0.00752861    0.0176778
  0.0          0.00752861    0.0176778
  0.0          0.00752861    0.0176778
  0.0          0.00752861    0.0176778
  0.00834592   0.0297922     0.0176778
  0.0          0.0443141     0.0176778
  0.0          0.0443141     0.0176778
 -0.00610844   0.0259213     0.0176778
  0.0         -0.0341819     0.0176343
  0.0         -0.0341

In [41]:
fp = 0.001
dX = dX - fp .* (r_p ./ dist_p)

746×3 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
 -0.000261528   0.0337294    -0.0167817
  0.000865031   0.0346836    -0.0176343
 -0.000261528   0.0337294    -0.0167817
  0.00863448   -0.0306261    -0.0172074
 -0.000866282  -0.0438145    -0.0176778
 -0.000866282  -0.0438145    -0.0176778
 -0.00697347   -0.0264231    -0.0176778
 -0.00473648   -0.00798106   -0.0168252
 -0.000577519  -0.00686158   -0.0172071
 -0.000866282  -0.00702906   -0.0176778
 -0.000288567  -0.00836257   -0.0172074
 -0.00252604   -0.000314693  -0.00514179
 -0.000866267  -0.00318409   -0.00514149
  ⋮                          
  0.00197595   -6.68209e-5    0.00475957
 -0.00473648    0.00707617    0.0168252
 -0.000577519   0.00686158    0.0172071
 -0.000866282   0.00802817    0.0176778
 -0.000288567   0.00836257    0.0172074
  0.00863448    0.0306261     0.0172074
 -0.000866282   0.0438145     0.0176778
 -0.000866282   0.0438145     0.0176778
 -0.00697347    0.0264231     0.0176778
 -0.000261528  -0.0337294     0.01678