# Installing Dependencies

In [5]:
using DelimitedFiles
using CUDA

# Initial Conditions

## Initial Variables

In [6]:
# Physical Conditions
const R_Agg = 13
const t_f = 100

# Time model Conditions
const dt = 0.5
const n_knn = 10

# Constants for Force Model
const r_max, s = 3, 1
const K = 1

function init()
    # Initial Coordinates of Aggregates
    CUDA.@time global X = readdlm("../data/Init/Two_Sphere/$R_Agg.csv", ',', Float32, header=true)[1][:, 1:3] |> cu


    # Inizializate Variables for kNN
    println("Inizializate Variables for kNN")
    CUDA.@time global i_Cell = CuArray{Float32}(undef, (size(X, 1), size(X, 1), 3))
    CUDA.@time global Dist = CuArray{Float32}(undef, (size(X, 1), size(X, 1)))
    CUDA.@time global idx = hcat([[CartesianIndex(i,1) for i=1:14] for j=1:size(X,1)]...) |> cu

    # Inizializate Variables for Forces
    println("Inizializate Variables for Forces")
    CUDA.@time global r = zeros(14,size(X)[1],3) |> cu
    CUDA.@time global dist = zeros(14, size(X)[1]) |> cu
    CUDA.@time global F = zeros(14, size(X)[1],3) |> cu
    CUDA.@time global dX = zeros(size(X)[1],3) |> cu;
end

init (generic function with 1 method)

## Function for knn in CUDA

In [7]:
function knn_cu()
    # Definig Variables for calculing knn
    global i_Cell; global Dist; global idx
    
    # Defining Coordinates of each cell on the aggregates
    println("Defining Coordinates of each cell on the aggregates")
    CUDA.@time i_Cell = reshape(repeat(X, size(X ,1)), size(X ,1), size(X ,1), 3) - reshape(repeat(X, inner=(size(X ,1),1)), size(X ,1), size(X ,1), 3)

    # Calculating Norm on every cell on the aggregate
    println("Calculating distances on every cell on the aggregate")
    CUDA.@time Dist = sqrt.(i_Cell[:,:,1] .^ 2 + i_Cell[:,:,2] .^ 2 + i_Cell[:,:,3] .^ 2)

    # Calculating index of knof each cell in the aggregate
    println("Calculating index of knn on each cell in the aggregate")
    CUDA.@time for i = 1:14
        idx[i,:] = findmin(Dist; dims=1)[2]
        Dist[idx[i,:]] .= Inf
    end
end

knn_cu (generic function with 1 method)

## Function for forces in CUDA

In [8]:
function forces()

    # Definig Variables for calculing dX
    global X; global dX; global idx

    # Finding Distances
    println("Finding Distances")
    CUDA.@time r = reshape(repeat(X, inner=(14,1)), 14, size(X)[1], 3) - X[getindex.(idx,1),:]

    # Finding Distances/Norm
    println("Finding Distances/Norm")
    CUDA.@time dist = (r[:,:,1] .^ 2 + r[:,:,2] .^ 2 + r[:,:,3] .^ 2) .^ (0.5)
    CUDA.@time dist = reshape(repeat((dist), outer=(1,3)) ,14 ,size(X)[1], 3)
    
    # Normalizationg Distances
    println("Normalizationg Distances")
    CUDA.@time r = r ./ dist

    # dX[i,:] +=  r/dist * F
    println("Calculationg Forces")
    CUDA.@time F = -K.*((dist .- r_max).^2) .* (dist .- s) .* r

    # Deleting Forces greater than R_Max
    println("Deleting Forces greater than R_Max")
    CUDA.@time F[dist .>  r_max] .= 0

    # Calculating de dX
    println("Calculating dX")
    CUDA.@time dX[:,1] = sum(F[:,:,1][2:end,:]; dims=1)
    CUDA.@time dX[:,2] = sum(F[:,:,2][2:end,:]; dims=1)
    CUDA.@time dX[:,3] = sum(F[:,:,3][2:end,:]; dims=1)

    # Euler Step for all Cells
    dX
end

forces (generic function with 1 method)

# Starting Functions

In [9]:
init()

  0.355455 seconds (682.59 k CPU allocations: 

28.392 MiB) (1 GPU allocation: 37.969 KiB, 15.33% memmgmt time)
Inizializate Variables for kNN
  0.002984 seconds (1.41 k CPU allocations: 80.974 KiB) (1 GPU allocation: 120.135 MiB, 7.73% memmgmt time)
  0.000116 seconds (8 CPU allocations: 

240 bytes) (1 GPU allocation: 40.045 MiB, 98.28% memmgmt time)


  5.071895 seconds (795.80 k CPU allocations: 44.407 MiB, 0.17% gc time) (1 GPU allocation: 708.750 KiB, 0.00% memmgmt time)
Inizializate Variables for Forces
  0.007872 seconds (11.62 k CPU allocations: 2.175 MiB) (1 GPU allocation: 531.562 KiB, 0.12% memmgmt time)
  0.005940 seconds (11.34 k CPU allocations: 1.125 MiB) (1 GPU allocation: 177.188 KiB, 0.17% memmgmt time)
  0.000204 seconds (13 CPU allocations: 1.558 MiB) (1 GPU allocation: 531.562 KiB, 1.82% memmgmt time)
  0.000030 seconds (13 CPU allocations: 114.328 KiB) (1 GPU allocation: 37.969 KiB, 7.54% memmgmt time)


3240×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 ⋮         
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0

In [10]:
knn_cu()

ErrorException: syntax: missing condition in "if" at /home/nicomosty/Documentos/Maestria-GitHub/Centre-Julia/test/Test_CUDA_1.ipynb:1

In [11]:
forces()

Finding Distances


 12.906229 seconds (38.28 M CPU allocations: 2.016 GiB, 6.10% gc time) (4 GPU allocations: 1.903 MiB, 0.00% memmgmt time)
Finding Distances/Norm


  3.469444 seconds (13.34 M CPU allocations: 718.275 MiB, 5.04% gc time) (9 GPU allocations: 1.730 MiB, 0.00% memmgmt time)


  0.770504 seconds (2.99 M CPU allocations: 160.464 MiB, 5.38% gc time) (1 GPU allocation: 1.038 MiB, 0.00% memmgmt time)
Normalizationg Distances


  0.737051 seconds (2.91 M CPU allocations: 155.154 MiB, 6.60% gc time) (1 GPU allocation: 1.038 MiB, 0.00% memmgmt time)
Calculationg Forces


  0.810148 seconds (3.10 M CPU allocations: 166.779 MiB, 5.71% gc time) (1 GPU allocation: 1.038 MiB, 0.00% memmgmt time)
Deleting Forces greater than R_Max


  6.470305 seconds (25.88 M CPU allocations: 1.342 GiB, 5.79% gc time) (4 GPU allocations: 4.274 MiB, 0.00% memmgmt time)
Calculating dX


  3.958211 seconds (16.71 M CPU allocations: 891.010 MiB, 9.09% gc time) (3 GPU allocations: 708.750 KiB, 0.00% memmgmt time)
  0.000135 seconds (72 CPU allocations: 3.125 KiB) (3 GPU allocations: 708.750 KiB, 16.17% memmgmt time)
  0.000095 seconds (72 CPU allocations: 3.125 KiB) (3 GPU allocations: 708.750 KiB, 10.83% memmgmt time)


3240×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
   1.0    1.73461    0.0
 NaN    NaN        NaN
 NaN    NaN        NaN
 NaN    NaN        NaN
 NaN    NaN        NaN
 NaN    NaN        NaN
 NaN    NaN        NaN
 NaN    NaN        NaN
 NaN    NaN        NaN
 NaN    NaN        NaN
   ⋮               
   0.0    0.0        0.0
   0.0    0.0        0.0
   0.0    0.0        0.0
   0.0    0.0        0.0
   0.0    0.0        0.0
   0.0    0.0        0.0
   0.0    0.0        0.0
   0.0    0.0        0.0
   0.0    0.0        0.0

In [12]:
CUDA.memory_status()

Effective GPU memory usage: 5.94% (716.125 MiB/11.767 GiB)
Memory pool usage: 163.196 MiB (192.000 MiB reserved)

In [13]:
for i in 0:Int(t_f/dt)
    if mod(i, n_knn) == 0
        println("kNN")
    else
        println(i*dt)
    end
end

kNN
0.5
1.0
1.5
2.0
2.5
3.0
3.5
4.0
4.5
kNN
5.5
6.0
6.5
7.0
7.5
8.0
8.5
9.0
9.5
kNN
10.5
11.0
11.5
12.0
12.5
13.0
13.5
14.0
14.5
kNN
15.5
16.0
16.5
17.0
17.5
18.0
18.5
19.0
19.5
kNN
20.5
21.0
21.5
22.0
22.5
23.0
23.5
24.0
24.5
kNN
25.5
26.0
26.5
27.0
27.5
28.0
28.5
29.0
29.5
kNN
30.5
31.0
31.5
32.0
32.5
33.0
33.5
34.0
34.5
kNN
35.5
36.0
36.5
37.0
37.5
38.0
38.5
39.0
39.5
kNN
40.5
41.0
41.5
42.0
42.5
43.0
43.5
44.0
44.5
kNN
45.5
46.0
46.5
47.0
47.5
48.0
48.5
49.0
49.5
kNN
50.5
51.0
51.5
52.0
52.5
53.0
53.5
54.0
54.5
kNN
55.5
56.0
56.5
57.0
57.5
58.0
58.5
59.0
59.5
kNN
60.5
61.0
61.5
62.0
62.5
63.0
63.5
64.0
64.5
kNN
65.5
66.0
66.5
67.0
67.5
68.0
68.5
69.0
69.5
kNN
70.5
71.0
71.5
72.0
72.5
73.0
73.5
74.0
74.5
kNN
75.5
76.0
76.5
77.0
77.5
78.0
78.5
79.0
79.5
kNN
80.5
81.0
81.5
82.0
82.5
83.0
83.5
84.0
84.5
kNN
85.5
86.0
86.5
87.0
87.5
88.0
88.5
89.0
89.5
kNN
90.5
91.0
91.5
92.0
92.5
93.0
93.5
94.0
94.5
kNN
95.5
96.0
96.5
97.0
97.5
98.0
98.5
99.0
99.5
kNN
