# Installing Dependencies

In [77]:
using DelimitedFiles
using CUDA
using BenchmarkTools: @btime
include("../../../src/forces.jl")
include("../../../src/neighbor.jl")

cu_knn (generic function with 1 method)

# Initial Conditions

In [78]:
# Running fusion of the two aggregates
# Physical Conditions
R_agg = 10

# Time model Conditions
t_f = 100000
dt = 0.1

# Neighbor Conditions
n_knn = 50
nn = 12

# Forces Conditions
r_max = 2.8
fp = 0.002
K = 0.05
s = 2.0

# Coding C0nditions
n_text = 50
num_agg = 2

@time X = Float32.(readdlm("../../../data/init/Sphere/$R_agg.xyz")[3:end,2:end]) |> cu

  0.007678 seconds (12.02 k allocations: 406.844 KiB)


746×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 -1.5  -2.89  -8.98
  0.5  -2.89  -8.98
  2.5  -2.89  -8.98
 -2.5  -1.15  -8.98
 -0.5  -1.15  -8.98
  1.5  -1.15  -8.98
  3.5  -1.15  -8.98
 -3.5   0.58  -8.98
 -1.5   0.58  -8.98
  0.5   0.58  -8.98
  2.5   0.58  -8.98
 -2.5   2.31  -8.98
 -0.5   2.31  -8.98
  ⋮           
  3.5  -2.31   8.98
 -3.5  -0.58   8.98
 -1.5  -0.58   8.98
  0.5  -0.58   8.98
  2.5  -0.58   8.98
 -2.5   1.15   8.98
 -0.5   1.15   8.98
  1.5   1.15   8.98
  3.5   1.15   8.98
 -1.5   2.89   8.98
  0.5   2.89   8.98
  2.5   2.89   8.98

# Using kNN

In [79]:
# Inizializate Variables for kNN
i_Cell = CuArray{Float32}(undef, (size(X, 1), size(X, 1), 3))
Dist = CuArray{Float32}(undef, (size(X, 1), size(X, 1)))
idx = hcat([[CartesianIndex(i,1) for i=1:nn] for j=1:size(X,1)]...) |> cu
rand_idx = CuArray{Int32}(undef, n_knn, size(X,1))
cu_knn()
rand_idx

50×746 CuArray{Int64, 2, CUDA.Mem.DeviceBuffer}:
 18  77  20  83  28  29  44  40  34  35  …  661  716  717  705  726  671  728
 23  24  25  34  35  36  37  41  42  43     709  710  711  712  722  723  724
  2   1   2   9  10  11   6  12   5   6     737  738  739  742  745  744  745
 24  23  26  42   1   2  30  33  10  11     703  744  745  718  723  722  725
 76  36  78  29  43  44  86  91  49  50     717  704  705  664  670  711  672
 22   6  24   1  29  30  45  48   8   9  …  744  717  718  706  721  742  723
  5   5   7  28   6   7   3  34  13  14     716  742  743  746  741  741  743
  5   5   7  28   6   7   3  34  13  14     716  742  743  746  741  741  743
  2   1   2   9  10  11   6  12   5   6     737  738  739  742  745  744  745
 35  25  37  27   2   3  32  35  35  36     715  745  746  720  710  724  712
 22   6  24   1  29  30  45  48   8   9  …  744  717  718  706  721  742  723
 23  24  25  34  35  36  37  41  42  43     709  710  711  712  722  723  724
  2   1   2   9

# CUDA Forces

## Define Variables for CUDA

In [80]:
@time X_Cu = X |> cu
@time dX_Cu = zeros(size(X)[1],3) |> cu
@time idx_Cu = idx |> cu
@time r = zeros(14,size(X)[1],3) |> cu
@time dist = zeros(14, size(X)[1]) |> cu
@time F = zeros(14, size(X)[1],3) |> cu;

  0.000004 seconds
  0.000066 seconds (11 allocations: 26.688 KiB)
  0.000004 seconds
  0.000088 seconds (12 allocations: 367.656 KiB)
  0.000049 seconds (12 allocations: 122.828 KiB)
  0.000067 seconds (12 allocations: 367.656 KiB)


## Generating Forces

### r

In [81]:
# Finding Distances
println("Finding Distances")
CUDA.@time r = reshape(repeat(X, inner=(nn,1)), nn, size(X)[1], 3) - X[getindex.(idx,1),:]

Finding Distances
  0.000211 seconds (158 CPU allocations: 8.188 KiB) (4 GPU allocations: 384.656 KiB, 10.21% memmgmt time)


12×746×3 CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
  0.0   0.0   0.0   0.0   0.0   0.0  …   0.0   0.0   0.0   0.0   0.0   0.0
  0.0   0.0   0.0   1.0   1.0   1.0      1.0   1.0   1.0   0.0   0.0   0.0
  1.0   1.0   1.0  -1.0  -1.0  -1.0     -1.0  -1.0  -1.0   1.0   1.0   1.0
 -1.0  -1.0  -1.0   1.0   1.0   1.0      1.0   1.0   1.0  -1.0  -1.0  -1.0
 -2.0   2.0   2.0  -1.0  -1.0  -1.0     -1.0  -1.0   2.0  -2.0   2.0   2.0
  1.0  -2.0   1.0  -2.0   2.0   2.0  …   2.0   2.0   0.0   1.0  -2.0   1.0
 -1.0   1.0  -1.0   0.0  -2.0  -2.0     -2.0  -2.0   1.0  -1.0   1.0  -1.0
  2.0  -1.0   2.0  -1.0   0.0   0.0      0.0   0.0   0.0   2.0  -1.0   2.0
 -2.0   2.0  -2.0   0.0   1.0   1.0      1.0   1.0   2.0  -2.0   2.0  -2.0
  0.0  -2.0   0.0   2.0  -1.0  -1.0     -1.0  -1.0  -2.0   0.0  -2.0   0.0
  0.0   0.0   0.0  -2.0   0.0   0.0  …   0.0   0.0   0.0   0.0   0.0   0.0
  1.0   0.0   1.0   0.0   2.0   2.0      2.0   2.0   2.0   1.0   0.0   1.0

[:, :, 2] =
  0.0    0.0    0.0   

In [82]:
t=5
CUDA.@time r_p = X - X[rand_idx[mod(t, n_knn)+1, :], :]

  0.000145 seconds (115 CPU allocations: 5.391 KiB) (3 GPU allocations: 23.312 KiB, 10.97% memmgmt time)


746×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
  2.0   1.15  -1.63
 -1.0  -1.74   0.0
  2.0   1.15  -1.63
 -1.0   1.74   0.0
  0.0   1.16  -1.63
  0.0   1.16  -1.63
  0.0  -2.3   -1.63
  0.0  -2.31  -1.63
  2.0   0.0    0.0
  2.0   0.0    0.0
  2.0   0.0    0.0
  0.0   1.16  -1.63
  2.0   0.0    0.0
  ⋮           
  2.0  -1.16   1.63
  0.0   2.31   1.63
  2.0   0.0    0.0
  2.0   0.0    0.0
  2.0   0.0    0.0
 -1.0  -1.74   0.0
  0.0  -1.16   1.63
  0.0  -1.16   1.63
  0.0   2.3    1.63
  2.0  -1.15   1.63
 -1.0   1.74   0.0
  2.0  -1.15   1.63

In [83]:
CUDA.@time dist = ((sum(r .^ 2, dims=3)) .^ 0.5)[:,:,1]
CUDA.@time dist = reshape(repeat((dist), outer=(1,3)) ,nn ,size(X)[1], 3)

  0.000209 seconds (176 CPU allocations: 8.625 KiB) (4 GPU allocations: 279.750 KiB, 8.44% memmgmt time)
  0.000074 seconds (40 CPU allocations: 1.875 KiB) (1 GPU allocation: 209.812 KiB, 14.89% memmgmt time)


12×746×3 CuArray{Float64, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
 0.0      0.0      0.0      0.0      …  0.0      0.0      0.0      0.0
 1.99484  1.99484  1.99484  1.99544     1.99544  1.99484  1.99484  1.99484
 1.99832  1.99832  1.99832  1.99544     1.99544  1.99832  1.99832  1.99832
 1.99832  1.99832  1.99832  1.99822     1.99822  1.99832  1.99832  1.99832
 2.0      2.0      2.0      1.99822     2.0      2.0      2.0      2.0
 2.00689  2.0      2.00689  2.0      …  2.00062  2.00689  2.0      2.00689
 2.00689  2.00689  2.00689  2.00062     2.00689  2.00689  2.00689  2.00689
 2.82478  2.00689  2.82478  2.00689     2.81902  2.82478  2.00689  2.82478
 2.82478  2.82478  2.82478  2.81902     2.82887  2.82478  2.82478  2.82478
 2.82719  2.82478  2.82719  2.82887     2.82887  2.82719  2.82478  2.82719
 3.26     2.82719  3.26     2.82887  …  3.26     3.26     2.82719  3.26
 3.45706  3.26     3.45706  3.26        3.45643  3.45706  3.26     3.45706

[:, :, 2] =
 0.0      0.0      0.0      0.0  

In [84]:
dist_p = (sum(r_p .^ 2, dims=2).^ 0.5)

746×1 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
 2.8247829522667742
 2.0068882102377086
 2.8247829522667742
 2.0068882102377086
 2.00062455915871
 2.00062455915871
 2.819024380949584
 2.827189422468466
 2.0
 2.0
 2.0
 2.00062455915871
 2.0
 ⋮
 2.8288687892351927
 2.827189422468466
 2.0
 2.0
 2.0
 2.0068882102377086
 2.00062455915871
 2.00062455915871
 2.819024380949584
 2.8247829522667742
 2.0068882102377086
 2.8247829522667742

## F -> Forces

In [85]:
# dX[i,:] +=  r/dist * F
println("Calculationg Forces")
@time F = -K.*((dist .- r_max).^2) .* (dist .- s) .* r ./ dist
F[dist .>  r_max] .= 0
F

Calculationg Forces
  0.930259 seconds (3.42 M allocations: 194.840 MiB, 6.88% gc time, 74.24% compilation time)


12×746×3 CuArray{Float64, 3, CUDA.Mem.DeviceBuffer}:
[:, :, 1] =
 NaN           NaN           NaN           …  NaN           NaN
   0.0           0.0           0.0              0.0           0.0
   2.69503e-5    2.69503e-5    2.69503e-5       2.69503e-5    2.69503e-5
  -2.69503e-5   -2.69503e-5   -2.69503e-5      -2.69503e-5   -2.69503e-5
   0.0          -0.0          -0.0             -0.0          -0.0
  -0.00010795    0.0          -0.00010795  …    0.0          -0.00010795
   0.00010795   -0.00010795    0.00010795      -0.00010795    0.00010795
   0.0           0.00010795    0.0              0.00010795    0.0
   0.0           0.0           0.0              0.0           0.0
   0.0           0.0           0.0              0.0           0.0
   0.0           0.0           0.0         …    0.0           0.0
   0.0           0.0           0.0              0.0           0.0

[:, :, 2] =
 NaN            NaN            NaN            …  NaN            NaN
   9.63662e-5     9.63662e-5     9.6

# dX -> Final

In [86]:
dX = sum(F[2:end,:,:]; dims=1)[1,:,:]

746×3 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
  0.0          0.000440769  -0.000224446
  0.0          0.000440769  -0.000224446
  0.0          0.000440769  -0.000224446
  0.00010795  -0.000382467  -0.000224621
  0.0         -0.0005703    -0.000224621
  0.0         -0.0005703    -0.000224621
 -7.93861e-5  -0.000333052  -0.000224621
 -5.71274e-5  -9.58038e-5   -0.000224621
  0.0         -9.58038e-5   -0.000224621
  0.0         -9.58038e-5   -0.000224621
  0.0         -9.58038e-5   -0.000224621
 -2.85637e-5   6.5828e-6    -7.15998e-5
  0.0         -4.28324e-5   -7.15998e-5
  ⋮                         
  2.85637e-5  -6.5828e-6     7.15998e-5
 -5.71274e-5   9.58038e-5    0.000224621
  0.0          9.58038e-5    0.000224621
  0.0          9.58038e-5    0.000224621
  0.0          9.58038e-5    0.000224621
  0.00010795   0.000382467   0.000224621
  0.0          0.0005703     0.000224621
  0.0          0.0005703     0.000224621
 -7.93861e-5   0.000333052   0.000224621
  0.0         -0.00044

In [87]:
fp = 0.001
dX = dX - fp .* (r_p ./ dist_p)

746×3 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
 -0.000708019   3.36585e-5    0.000352589
  0.000498284   0.00130778   -0.000224446
 -0.000708019   3.36585e-5    0.000352589
  0.000606234  -0.00124948   -0.000224621
  0.0          -0.00115012    0.000590125
  0.0          -0.00115012    0.000590125
 -7.93861e-5    0.000482833   0.000353593
 -5.71274e-5    0.000721262   0.000351923
 -0.001        -9.58038e-5   -0.000224621
 -0.001        -9.58038e-5   -0.000224621
 -0.001        -9.58038e-5   -0.000224621
 -2.85637e-5   -0.000573236   0.000743146
 -0.001        -4.28324e-5   -7.15998e-5
  ⋮                          
 -0.000678433   0.000403475  -0.000504602
 -5.71274e-5   -0.000721262  -0.000351923
 -0.001         9.58038e-5    0.000224621
 -0.001         9.58038e-5    0.000224621
 -0.001         9.58038e-5    0.000224621
  0.000606234   0.00124948    0.000224621
  0.0           0.00115012   -0.000590125
  0.0           0.00115012   -0.000590125
 -7.93861e-5   -0.000482833  -0.0003535

: 