# Installing Dependencies

In [21]:
using DelimitedFiles
using CUDA

# Initial Conditions

## Initial Variables

In [22]:
# Physical Conditions
const R_Agg = 13
const t_f = 100

# Time model Conditions
const dt = 0.5
const n_knn = 10

# Constants for Force Model
const r_max, s = 3, 1
const K = 1

function init()
    # Initial Coordinates of Aggregates
    CUDA.@time global X = readdlm("../data/Init/Two_Sphere/$R_Agg.csv", ',', Float32, header=true)[1][:, 1:3] |> cu


    # Inizializate Variables for kNN
    println("Inizializate Variables for kNN")
    CUDA.@time global i_Cell = CuArray{Float32}(undef, (size(X, 1), size(X, 1), 3))
    CUDA.@time global Dist = CuArray{Float32}(undef, (size(X, 1), size(X, 1)))
    CUDA.@time global idx = hcat([[CartesianIndex(i,1) for i=1:14] for j=1:size(X,1)]...) |> cu

    # Inizializate Variables for Forces
    println("Inizializate Variables for Forces")
    CUDA.@time global r = zeros(14,size(X)[1],3) |> cu
    CUDA.@time global dist = zeros(14, size(X)[1]) |> cu
    CUDA.@time global F = zeros(14, size(X)[1],3) |> cu
    CUDA.@time global dX = zeros(size(X)[1],3) |> cu;
end

init (generic function with 1 method)

## Function for knn in CUDA

In [23]:
function knn_cu()
    # Definig Variables for calculing knn
    global i_Cell; global Dist; global idx
    
    # Defining Coordinates of each cell on the aggregates
    println("Defining Coordinates of each cell on the aggregates")
    CUDA.@time i_Cell = reshape(repeat(X, size(X ,1)), size(X ,1), size(X ,1), 3) - reshape(repeat(X, inner=(size(X ,1),1)), size(X ,1), size(X ,1), 3)

    # Calculating Norm on every cell on the aggregate
    println("Calculating distances on every cell on the aggregate")
    CUDA.@time Dist = sqrt.(i_Cell[:,:,1] .^ 2 + i_Cell[:,:,2] .^ 2 + i_Cell[:,:,3] .^ 2)
    i_Cell = nothing; GC.gc(true)

    # Calculating index of knof each cell in the aggregate
    println("Calculating index of knn on each cell in the aggregate")
    CUDA.@time for i = 1:14
        idx[i,:] = findmin(Dist; dims=1)[2]
        Dist[idx[i,:]] .= Inf
    end
    Dist = nothing; GC.gc(true)

    synchronize()
end

knn_cu (generic function with 1 method)

## Function for forces in CUDA

In [24]:
function forces()

    # Definig Variables for calculing dX
    global X; global dX; global idx

    # Finding Distances
    println("Finding Distances")
    CUDA.@time r = reshape(repeat(X, inner=(14,1)), 14, size(X)[1], 3) - X[getindex.(idx,1),:]

    # Finding Distances/Norm
    println("Finding Distances/Norm")
    CUDA.@time dist = (r[:,:,1] .^ 2 + r[:,:,2] .^ 2 + r[:,:,3] .^ 2) .^ (0.5)
    CUDA.@time dist = reshape(repeat((dist), outer=(1,3)) ,14 ,size(X)[1], 3)
    
    # Normalizationg Distances
    println("Normalizationg Distances")
    CUDA.@time r = r ./ dist

    # dX[i,:] +=  r/dist * F
    println("Calculationg Forces")
    CUDA.@time F = -K.*((dist .- r_max).^2) .* (dist .- s) .* r

    # Deleting Forces greater than R_Max
    println("Deleting Forces greater than R_Max")
    CUDA.@time F[dist .>  r_max] .= 0

    # Calculating de dX
    println("Calculating dX")
    CUDA.@time dX[:,1] = sum(F[:,:,1][2:end,:]; dims=1)
    CUDA.@time dX[:,2] = sum(F[:,:,2][2:end,:]; dims=1)
    CUDA.@time dX[:,3] = sum(F[:,:,3][2:end,:]; dims=1)

    synchronize()
end

forces (generic function with 1 method)

# Starting Functions

In [25]:
init()

  0.007534 seconds (295.47 k CPU allocations: 7.882 MiB) (1 GPU allocation: 37.969 KiB, 0.17% memmgmt time)
Inizializate Variables for kNN
  0.000009 seconds (8 CPU allocations: 256 bytes) (1 GPU allocation: 120.135 MiB, 53.67% memmgmt time)
  0.000004 seconds (8 CPU allocations: 240 bytes) (1 GPU allocation: 40.045 MiB, 54.55% memmgmt time)
  0.030330 seconds (115.17 k CPU allocations: 7.998 MiB) (1 GPU allocation: 708.750 KiB, 0.04% memmgmt time)
Inizializate Variables for Forces
  0.000223 seconds (13 CPU allocations: 1.558 MiB) (1 GPU allocation: 531.562 KiB, 1.86% memmgmt time)
  0.000078 seconds (13 CPU allocations: 531.953 KiB) (1 GPU allocation: 177.188 KiB, 4.07% memmgmt time)
  0.000265 seconds (13 CPU allocations: 1.558 MiB) (1 GPU allocation: 531.562 KiB, 0.78% memmgmt time)
  0.000027 seconds (13 CPU allocations: 114.328 KiB) (1 GPU allocation: 37.969 KiB, 6.99% memmgmt time)


3240×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 ⋮         
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0

In [26]:
knn_cu()

Defining Coordinates of each cell on the aggregates
  0.003547 seconds (109 CPU allocations: 5.531 KiB) (3 GPU allocations: 360.406 MiB, 0.85% memmgmt time)
Calculating distances on every cell on the aggregate
  0.003161 seconds (226 CPU allocations: 10.422 KiB) (9 GPU allocations: 360.406 MiB, 14.72% memmgmt time)
Calculating index of knn on each cell in the aggregate


  0.009516 seconds (2.35 k CPU allocations: 821.562 KiB) (56 GPU allocations: 2.596 MiB, 1.19% memmgmt time)


In [27]:
forces()

Finding Distances
  0.000170 seconds (72 CPU allocations: 3.234 KiB) (4 GPU allocations: 1.903 MiB, 13.18% memmgmt time)
Finding Distances/Norm
  0.000181 seconds (177 CPU allocations: 7.484 KiB) (9 GPU allocations: 1.730 MiB, 5.20% memmgmt time)
  0.000022 seconds (20 CPU allocations: 896 bytes) (1 GPU allocation: 1.038 MiB, 7.07% memmgmt time)
Normalizationg Distances
  0.000065 seconds (18 CPU allocations: 832 bytes) (1 GPU allocation: 1.038 MiB, 3.19% memmgmt time)
Calculationg Forces
  0.000091 seconds (92 CPU allocations: 9.453 KiB) (1 GPU allocation: 1.038 MiB, 2.84% memmgmt time)
Deleting Forces greater than R_Max
  0.000579 seconds (269 CPU allocations: 151.734 KiB) (4 GPU allocations: 1.305 MiB, 2.07% memmgmt time)
Calculating dX
  0.000074 seconds (73 CPU allocations: 3.188 KiB) (3 GPU allocations: 708.750 KiB, 5.68% memmgmt time)
  0.000046 seconds (73 CPU allocations: 3.188 KiB) (3 GPU allocations: 708.750 KiB, 6.25% memmgmt time)
  0.000044 seconds (73 CPU allocations: 3.

In [31]:

for i in 0:Int(t_f/dt)
    if mod(i, n_knn) == 0
        println("kNN")
    end
    println(i*dt)
end

kNN
0.0
0.5
1.0
1.5
2.0
2.5
3.0
3.5
4.0
4.5
kNN
5.0
5.5
6.0
6.5
7.0
7.5
8.0
8.5
9.0
9.5
kNN
10.0
10.5
11.0
11.5
12.0
12.5
13.0
13.5
14.0
14.5
kNN
15.0
15.5
16.0
16.5
17.0
17.5
18.0
18.5
19.0
19.5
kNN
20.0
20.5
21.0
21.5
22.0
22.5
23.0
23.5
24.0
24.5
kNN
25.0
25.5
26.0
26.5
27.0
27.5
28.0
28.5
29.0
29.5
kNN
30.0
30.5
31.0
31.5
32.0
32.5
33.0
33.5
34.0
34.5
kNN
35.0
35.5
36.0
36.5
37.0
37.5
38.0
38.5
39.0
39.5
kNN
40.0
40.5
41.0
41.5
42.0
42.5
43.0
43.5
44.0
44.5
kNN
45.0
45.5
46.0
46.5
47.0
47.5
48.0
48.5
49.0
49.5
kNN
50.0
50.5
51.0
51.5
52.0
52.5
53.0
53.5
54.0
54.5
kNN
55.0
55.5
56.0
56.5
57.0
57.5
58.0
58.5
59.0
59.5
kNN
60.0
60.5
61.0
61.5
62.0
62.5
63.0
63.5
64.0
64.5
kNN
65.0
65.5
66.0
66.5
67.0
67.5
68.0
68.5
69.0
69.5
kNN
70.0
70.5
71.0
71.5
72.0
72.5
73.0
73.5
74.0
74.5
kNN
75.0
75.5
76.0
76.5
77.0
77.5
78.0
78.5
79.0
79.5
kNN
80.0
80.5
81.0
81.5
82.0
82.5
83.0
83.5
84.0
84.5
kNN
85.0
85.5
86.0
86.5
87.0
87.5
88.0
88.5
89.0
89.5
kNN
90.0
90.5
91.0
91.5
92.0
92.5
93.0
93.5
94.0

In [29]:
CUDA.memory_status()

Effective GPU memory usage: 12.92% (1.520 GiB/11.767 GiB)
Memory pool usage: 171.248 MiB (1.031 GiB reserved)

In [30]:
idx

14×3240 CuArray{CartesianIndex{2}, 2, CUDA.Mem.DeviceBuffer}:
 CartesianIndex(1, 1)   CartesianIndex(2, 2)   …  CartesianIndex(3240, 3240)
 CartesianIndex(4, 1)   CartesianIndex(5, 2)      CartesianIndex(3237, 3240)
 CartesianIndex(5, 1)   CartesianIndex(6, 2)      CartesianIndex(3238, 3240)
 CartesianIndex(2, 1)   CartesianIndex(1, 2)      CartesianIndex(3239, 3240)
 CartesianIndex(31, 1)  CartesianIndex(32, 2)     CartesianIndex(3211, 3240)
 CartesianIndex(24, 1)  CartesianIndex(25, 2)  …  CartesianIndex(3217, 3240)
 CartesianIndex(25, 1)  CartesianIndex(26, 2)     CartesianIndex(3218, 3240)
 CartesianIndex(30, 1)  CartesianIndex(31, 2)     CartesianIndex(3212, 3240)
 CartesianIndex(32, 1)  CartesianIndex(33, 2)     CartesianIndex(3210, 3240)
 CartesianIndex(19, 1)  CartesianIndex(20, 2)     CartesianIndex(3222, 3240)
 CartesianIndex(85, 1)  CartesianIndex(86, 2)  …  CartesianIndex(3156, 3240)
 CartesianIndex(8, 1)   CartesianIndex(9, 2)      CartesianIndex(3233, 3240)
 CartesianInde