# Dependences

In [1]:
include("../../src/struct_data.jl")
include("../../src/neighbor.jl")
include("../../src/forces/forces.jl")

cu_force (generic function with 1 method)

# Model

In [2]:
@time Model = ModelSet(
    TimeModel(
        tₛᵢₘ  = 10000.0,
        dt    = 0.5,
        nₖₙₙ  = 50,
        nₛₐᵥₑ = 100
    ),
    InputModel(
        outer_ratio = 0.8,
        path_input = "../../data/init/Sphere"
    )
)
dump(Model)

  0.000001 seconds (3 allocations: 112 bytes)
ModelSet
  Time: TimeModel
    tₛᵢₘ: Float64 10000.0
    dt: Float64 0.5
    nₖₙₙ: Int64 50
    nₛₐᵥₑ: Int64 100
  Input: InputModel
    outer_ratio: Float64 0.8
    path_input: String "../../data/init/Sphere"


# Aggregate

In [3]:
@time @start_agg FusionAGG = FusionAggregate(
    [
        AggType(
            "HEK_1", 
            InteractionPar(
                Cubic(0.01,2.0,3.0), 
                ContractilePar(0.001)
            ),
            Float64.(readdlm("../../data/init/Sphere/15.0.xyz")[3:end,2:end]) |> cu
        ),
        AggType(
            "HEK_2", 
            InteractionPar(
                Cubic(0.01,2.0,3.0), 
                ContractilePar(0.001)
            ),
            Float64.(readdlm("../../data/init/Sphere/15.0.xyz")[3:end,2:end]) |> cu
        )
    ], 
    Model
)
show_aggregates(FusionAGG)

  7.393977 seconds (13.11 M allocations: 1.133 GiB, 3.66% gc time, 4.89% compilation time)


2-element Vector{AggType}:
 AggType("HEK_1", InteractionPar(Cubic{Float64}(0.01, 2.0, 3.0), ContractilePar(0.001)), 15.27f0, Float32[-1.5 -4.62 -13.88; 0.5 -4.62 -13.88; … ; 0.5 4.62 13.88; 2.5 4.62 13.88], CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
 AggType("HEK_2", InteractionPar(Cubic{Float64}(0.01, 2.0, 3.0), ContractilePar(0.001)), 15.27f0, Float32[-1.5 -4.62 -13.88; 0.5 -4.62 -13.88; … ; 0.5 4.62 13.88; 2.5 4.62 13.88], CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})

Index of List of Aggregates


1×5008 CuArray{Int64, 2, CUDA.Mem.DeviceBuffer}:
 1  1  1  1  1  1  1  1  1  1  1  1  1  …  2  2  2  2  2  2  2  2  2  2  2  2

Index of Number of Aggregates


1×5008 CuArray{Int64, 2, CUDA.Mem.DeviceBuffer}:
 1  1  1  1  1  1  1  1  1  1  1  1  1  …  2  2  2  2  2  2  2  2  2  2  2  2

Index of Name of Aggregates


1×5008 Matrix{String}:
 "HEK_1"  "HEK_1"  "HEK_1"  "HEK_1"  …  "HEK_2"  "HEK_2"  "HEK_2"  "HEK_2"



5008×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 -16.77  -4.62  -13.88
 -14.77  -4.62  -13.88
 -12.77  -4.62  -13.88
 -19.77  -2.89  -13.88
 -17.77  -2.89  -13.88
 -15.77  -2.89  -13.88
 -13.77  -2.89  -13.88
 -11.77  -2.89  -13.88
 -20.77  -1.15  -13.88
 -18.77  -1.15  -13.88
 -16.77  -1.15  -13.88
 -14.77  -1.15  -13.88
 -12.77  -1.15  -13.88
   ⋮            
  13.77   1.15   13.88
  15.77   1.15   13.88
  17.77   1.15   13.88
  19.77   1.15   13.88
  10.77   2.89   13.88
  12.77   2.89   13.88
  14.77   2.89   13.88
  16.77   2.89   13.88
  18.77   2.89   13.88
  13.77   4.62   13.88
  15.77   4.62   13.88
  17.77   4.62   13.88

Radius_agg


1×5008 Matrix{Float32}:
 15.27  15.27  15.27  15.27  15.27  …  15.27  15.27  15.27  15.27  15.27

Outline


1×5008 Matrix{Int64}:
 1  1  1  1  1  1  1  1  1  1  1  1  1  …  1  1  1  1  1  1  1  1  1  1  1  1

Outer/Total = 0.46325878594249204
---------------------- Parameter --------------------
Force


Cubic{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}(Float32[0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01  …  0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01], Float32[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0  …  2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], Float32[3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0  …  3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0])

Contractile


ContractilePar(Float32[0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001  …  0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001])

Radius


2-element CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}:
 15.27
 15.27

------------------ Neighbors Size -------------------
idx      = (5008, 5008)
idx_red  = (21, 5008)
idx_sum  = (1, 5008)
idx_cont = (50, 5008)
------------------- Forces Size ---------------------
dX       = (5008, 3)
F        = (5008, 3)


# kNN

In [4]:
println("------------------------ SIZE ------------------------")
println("r_max         = $(FusionAGG.Type[1].Interaction.Force.rₘₐₓ)")
println("Size X        = $(size(FusionAGG.Position))")
println("Size idx      = $(size(FusionAGG.Simulation.Neighbor.idx))")
println("Size idx_real  = $(size(FusionAGG.Simulation.Neighbor.idx))")
println("Size idx_sum   = $(size(FusionAGG.Simulation.Neighbor.idx_sum))")
println("Size idx_red   = $(size(FusionAGG.Simulation.Neighbor.idx_red))")
@time nearest_neighbors(FusionAGG)
println("------------------------ IDX -------------------------")
println("idx")
display(FusionAGG.Simulation.Neighbor.idx)
println("idx_red")
display(FusionAGG.Simulation.Neighbor.idx_red)
println("idx_sum")
display(FusionAGG.Simulation.Neighbor.idx_sum)
println("idx_cont")
display(FusionAGG.Simulation.Neighbor.idx_cont)

------------------------ SIZE ------------------------
r_max         = 3.0
Size X        = (5008, 3)
Size idx      = (5008, 5008)
Size idx_real  = (5008, 5008)
Size idx_sum   = (1, 5008)
Size idx_red   = (21, 5008)
Threads = (32, 32) | Blocks  = (157, 157)
Threads = 256 | Blocks  = 20
Threads = (32, 32) | Blocks  = (157, 157)
 19.878302 seconds (31.21 M allocations: 1.773 GiB, 3.23% gc time, 17.23% compilation time)
------------------------ IDX -------------------------
idx


5008×5008 CuArray{Int64, 2, CUDA.Mem.DeviceBuffer}:
 1  1  0   0   1   1   0   0   0   0  …     0     0     0     0     0     0
 2  2  2   0   0   2   2   0   0   0        0     0     0     0     0     0
 0  3  3   0   0   0   3   3   0   0        0     0     0     0     0     0
 0  0  0   4   4   0   0   0   4   4        0     0     0     0     0     0
 5  0  0   5   5   5   0   0   0   5        0     0     0     0     0     0
 6  6  0   0   6   6   6   0   0   0  …     0     0     0     0     0     0
 0  7  7   0   0   7   7   7   0   0        0     0     0     0     0     0
 0  0  8   0   0   0   8   8   0   0        0     0     0     0     0     0
 0  0  0   9   0   0   0   0   9   9        0     0     0     0     0     0
 0  0  0  10  10   0   0   0  10  10        0     0     0     0     0     0
 0  0  0   0  11  11   0   0   0  11  …     0     0     0     0     0     0
 0  0  0   0   0  12  12   0   0   0        0     0     0     0     0     0
 0  0  0   0   0   0  13  13   0   0

idx_red


21×5008 CuArray{Int64, 2, CUDA.Mem.DeviceBuffer}:
  1   1   2   4   1   1   2   3   4  …  4949  4950  4951  4957  4958  4959
  2   2   3   5   4   2   3   7   9     4957  4958  4959  4964  4965  4966
  5   3   7   9   5   5   6   8  10     4958  4959  4960  4965  4966  4967
  6   6   8  10   6   6   7  13  15     4964  4965  4966  4971  4972  4973
 35   7  37  40  10   7   8  14  48     4965  4966  4967  4972  4973  4974
 36  36  38  41  11  11  12  44  49  …  4966  4967  4968  4973  4974  4975
 37  37  39  42  41  12  13  45  50     4997  4998  4999  5002  5003  5004
 42  38  44  49  42  42  43  46  57     4998  4999  5000  5003  5004  5005
 43  43  45  50  43  43  44  53  58     5002  5003  5004  5006  5006  5007
 51  44  53  58  50  44  45  54  66     5003  5004  5005  5007  5007  5008
  0  52   0   0  51  51  52  62   0  …  5004  5005  5008     0  5008     0
  0   0   0   0  59  52  53   0   0     5006  5007     0     0     0     0
  0   0   0   0   0  60  61   0   0     5007  5008

idx_sum


1×5008 CuArray{Int64, 2, CUDA.Mem.DeviceBuffer}:
 10  11  10  10  12  13  13  11  10  …  11  10  12  13  13  11  10  11  10

idx_cont


50×5008 CuArray{Int64, 2, CUDA.Mem.DeviceBuffer}:
 51  43   2  58  41   7   7  54  66  …  5002  5005  5008  5007  4966  5005
  1   7  37  40  51   2   8  62  48     4965  5004  4960  5003  4973  4973
  2  37   7  50  11  51  53  54  49     4997  4958  4951  5006  4958  5008
 36   3  45  58   5  52   8  13  50     5002  4966  5000  4973  4966  4974
 51   2   7  41   4   6   8  13  50     4964  5007  5005  4957  5008  4967
  5   7   8   4  11  43  44  45  15  …  5004  4958  5008  5003  4965  4959
 51   2  44   5  50  42  43  13  48     5006  4958  4999  4965  4966  4966
 37  52  45  50   5   2  61  62  66     4957  5005  4959  4965  4974  4967
  1  44   7  10  11   1  53  45  48     4957  4967  5008  5007  4965  4974
 36  52  39  10  11  52  12  54  66     5004  4966  4968  4972  4974  4966
 42   7  37  10  11   6   8  46  10  …  4964  4967  4968  5007  5003  5007
 37  43   8   4  50   2  61  13  50     5003  5005  4959  5006  4972  4967
  2   3   7  41  51  51  61  14  10     4958  4999

# Force

## GPU

In [6]:
println("Force Parameter")
display(FusionAGG.Simulation.Parameter.Force)
println("Contractile Parameter")
display(FusionAGG.Simulation.Parameter.Contractile.fₚ')
println("Running CUDA kernel")
@time cu_force(FusionAGG,1.0)
println("Force")
display(FusionAGG.Simulation.Force.F)

Force Parameter


Cubic{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}(Float32[0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01  …  0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01], Float32[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0  …  2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], Float32[3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0  …  3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0])

Contractile Parameter


1×5008 adjoint(::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}) with eltype Float32:
 0.001  0.001  0.001  0.001  0.001  …  0.001  0.001  0.001  0.001  0.001

Running CUDA kernel
  0.000119 seconds (48 allocations: 3.250 KiB)
Force


5008×3 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
  0.0          -0.000831988  -0.00021189
  0.000500419  -0.000305165  -0.000451029
  0.001        -1.49224f-5    0.000364654
  0.0          -0.000683325  -0.00021189
  0.0006991     0.000556282  -0.000212382
 -0.001         0.000164602   0.000364654
  0.0           0.000164602   0.000364654
 -0.0004915    -0.000141072  -0.00045103
  2.49331f-5   -0.000900311  -0.000213387
 -0.000498284   0.000708258   0.000364828
  0.000706996   0.000251302  -0.000211375
 -0.000498284   0.000708258   0.000364828
  0.000706996   0.000251302  -0.000211375
  ⋮                          
  0.000500444   0.00102452   -0.000364828
 -0.000498284  -0.000708258  -0.000364828
  0.001         0.000158756  -0.000364828
  0.000966149   9.98547f-5   -0.000364828
  0.000500419   0.000156502   0.00045103
 -8.91939f-6   -0.000725658   0.000452453
  0.001        -0.000164602  -0.000364654
 -0.001        -0.000164602  -0.000364654
  0.000509364  -0.00101494   -0.000364654

## CPU

In [28]:
size(fieldnames(Cubic),1)

3

In [55]:
X_CPU          = Matrix(FusionAGG.Position)
idx_red_CPU    = Matrix(FusionAGG.Simulation.Neighbor.idx_red)
idx_cont_CPU   = Matrix(FusionAGG.Simulation.Neighbor.idx_cont)
force_CPU      = zeros(size(X_CPU))

Param = vcat(
        [
            unique(
                getfield(FusionAGG.Simulation.Parameter.Force, fieldnames(Cubic)[i])
            ) for i=1:size(fieldnames(Cubic),1)
    ]...)
Param          = Cubic(Param...)
A_con          = unique(FusionAGG.Simulation.Parameter.Contractile.fₚ)[1]

t_knn = 1

1

In [56]:
function runCPU()
    for j = 1:size(idx_red_CPU,1)
        for i =1:size(X_CPU,1)
            if idx_red_CPU[j,i] != 0 && idx_red_CPU[j,i] != i
                dist = euclidean(X_CPU,i,idx_red_CPU[j,i])
                force_CPU[i,:] += force_func(Param,dist) .* (X_CPU[i,:] - X_CPU[idx_red_CPU[j,i],:]) ./ dist
            end
        end
    end

    for i =1:size(X_CPU,1)
        if idx_cont_CPU[t_knn,i] != i
            dist = euclidean(X_CPU,i,idx_cont_CPU[t_knn,i])
            force_CPU[i,:] += A_con .* (X_CPU[i,:] - X_CPU[idx_cont_CPU[t_knn,i],:]) ./ dist
        end
    end
end

runCPU()
display(force_CPU)

5008×3 Matrix{Float64}:
  0.0          -0.000831988  -0.00021189
  0.000500419  -0.000305165  -0.000451029
  0.001        -1.49224e-5    0.000364654
  0.0          -0.000683325  -0.00021189
  0.0006991     0.000556282  -0.000212382
 -0.001         0.000164602   0.000364654
  0.0           0.000164602   0.000364654
 -0.0004915    -0.000141072  -0.00045103
  2.49332e-5   -0.000900313  -0.000213385
 -0.000498284   0.000708257   0.000364829
  0.000706996   0.0002513    -0.000211373
 -0.000498284   0.000708257   0.000364829
  0.000706996   0.0002513    -0.000211373
  ⋮                          
  0.000500444   0.00102453   -0.000364829
 -0.000498284  -0.000708257  -0.000364829
  0.001         0.000158757  -0.000364829
  0.000966149   9.98561e-5   -0.000364829
  0.000500419   0.000156502   0.00045103
 -8.91939e-6   -0.000725658   0.000452453
  0.001        -0.000164602  -0.000364654
 -0.001        -0.000164602  -0.000364654
  0.000509364  -0.00101494   -0.000364654
 -0.001         1.49224e-5

## GPU vs CPU

In [62]:
using Test
@test Matrix(FusionAGG.Simulation.Force.F) ≈ force_CPU atol=0.01

[32m[1mTest Passed[22m[39m