# Benchmark

In [1]:
using BenchmarkTools

In [2]:
# Valori originali del test di Crabb
const box_size = 1f3;
const dim = 3;
const dt = 0.01f0;
const num_part_types = 2;
const nsteps = 1000;
const periodic = true;
const sinterval = 10;

### Serial version benchmark

In [3]:
include("src/serialmd.jl")

Main.Serialmd

In [4]:
function benchmark_serial(part_num)
    # Genera stato iniziale del sistema sulla base dei valori originali
    pos, vel, acc, masses, interactions, ptypes = Serialmd.random_data(dim, part_num, num_part_types, box_size)
    # Benchmark
    @benchmark Serialmd.dynamics_sim!($nsteps, $sinterval, $false, $dt, $pos, $vel, $acc, $masses, 
                                      $interactions, $ptypes, $box_size, $periodic, $true)
end

benchmark_serial (generic function with 1 method)

In [5]:
## PRE-BENCHMARK ## (Compilazione JIT e Caching)

In [6]:
@time pos, vel, acc, masses, interactions, ptypes = Serialmd.random_data(3, 10, 2, 10);

  0.487893 seconds (1.18 M allocations: 60.191 MiB, 4.91% gc time)


In [7]:
@time Serialmd.dynamics_sim!(3, 1, false, dt, pos, vel, acc, masses, interactions, ptypes, box_size, periodic, true);

  0.424754 seconds (945.18 k allocations: 46.241 MiB, 2.93% gc time)


In [8]:
# Serial test, 100 particelle
benchmark_serial(100)

BenchmarkTools.Trial: 
  memory estimate:  118.59 KiB
  allocs estimate:  3
  --------------
  minimum time:     345.141 ms (0.00% GC)
  median time:      346.171 ms (0.00% GC)
  mean time:        346.541 ms (0.00% GC)
  maximum time:     349.829 ms (0.00% GC)
  --------------
  samples:          15
  evals/sample:     1

In [9]:
# Serial test, 500 particelle
benchmark_serial(500)

BenchmarkTools.Trial: 
  memory estimate:  592.02 KiB
  allocs estimate:  3
  --------------
  minimum time:     8.656 s (0.00% GC)
  median time:      8.656 s (0.00% GC)
  mean time:        8.656 s (0.00% GC)
  maximum time:     8.656 s (0.00% GC)
  --------------
  samples:          1
  evals/sample:     1

In [10]:
# Serial test, 1000 particelle
benchmark_serial(1000)

BenchmarkTools.Trial: 
  memory estimate:  1.16 MiB
  allocs estimate:  3
  --------------
  minimum time:     34.719 s (0.00% GC)
  median time:      34.719 s (0.00% GC)
  mean time:        34.719 s (0.00% GC)
  maximum time:     34.719 s (0.00% GC)
  --------------
  samples:          1
  evals/sample:     1

In [11]:
# Serial test, 1200 particelle
benchmark_serial(1200)

BenchmarkTools.Trial: 
  memory estimate:  1.39 MiB
  allocs estimate:  3
  --------------
  minimum time:     49.810 s (0.00% GC)
  median time:      49.810 s (0.00% GC)
  mean time:        49.810 s (0.00% GC)
  maximum time:     49.810 s (0.00% GC)
  --------------
  samples:          1
  evals/sample:     1

In [12]:
# Serial test, 1500 particelle
benchmark_serial(1500)

BenchmarkTools.Trial: 
  memory estimate:  1.73 MiB
  allocs estimate:  4
  --------------
  minimum time:     77.771 s (0.00% GC)
  median time:      77.771 s (0.00% GC)
  mean time:        77.771 s (0.00% GC)
  maximum time:     77.771 s (0.00% GC)
  --------------
  samples:          1
  evals/sample:     1

### CPU Multi Thread Version Benchmark

In [13]:
include("src/CPUMTmd.jl")

Main.CPUMTmd

In [14]:
println("Threads disponibili: ", CPUMTmd.nthreads())

Threads disponibili: 8


In [15]:
function benchmark_CPUMT(part_num)
    # Genera stato iniziale del sistema sulla base dei valori originali
    pos, vel, acc, masses, interactions, ptypes = CPUMTmd.random_data(dim, part_num, num_part_types, box_size)
    # Benchmark
    @benchmark begin
        @sync begin
            CPUMTmd.dynamics_sim!($nsteps, $sinterval, $false, $dt, $pos, $vel, $acc, $masses, 
                                  $interactions, $ptypes, $box_size, $periodic, $true)
        end
    end
end

benchmark_CPUMT (generic function with 1 method)

In [16]:
## PRE-BENCHMARK ## (Compilazione JIT e Caching)

In [17]:
@time pos, vel, acc, masses, interactions, ptypes = CPUMTmd.random_data(3, 10, 2, 10);

  0.113945 seconds (245.78 k allocations: 12.206 MiB)


In [18]:
@time CPUMTmd.dynamics_sim!(3, 1, false, dt, pos, vel, acc, masses, interactions, ptypes, box_size, periodic, true);

  0.382960 seconds (738.01 k allocations: 36.333 MiB, 2.25% gc time)


In [19]:
# CPU-MT test 100 particelle
benchmark_CPUMT(100)

BenchmarkTools.Trial: 
  memory estimate:  45.60 MiB
  allocs estimate:  2321113
  --------------
  minimum time:     271.049 ms (0.00% GC)
  median time:      301.831 ms (0.00% GC)
  mean time:        313.836 ms (2.88% GC)
  maximum time:     367.116 ms (0.00% GC)
  --------------
  samples:          16
  evals/sample:     1

In [20]:
# CPU-MT test 500 particelle
benchmark_CPUMT(500)

BenchmarkTools.Trial: 
  memory estimate:  180.30 MiB
  allocs estimate:  11118329
  --------------
  minimum time:     2.288 s (0.00% GC)
  median time:      2.341 s (2.76% GC)
  mean time:        2.353 s (1.85% GC)
  maximum time:     2.429 s (2.66% GC)
  --------------
  samples:          3
  evals/sample:     1

In [21]:
# CPU-MT test 1000 particelle
benchmark_CPUMT(1000)

BenchmarkTools.Trial: 
  memory estimate:  348.77 MiB
  allocs estimate:  22121327
  --------------
  minimum time:     9.368 s (0.85% GC)
  median time:      9.368 s (0.85% GC)
  mean time:        9.368 s (0.85% GC)
  maximum time:     9.368 s (0.85% GC)
  --------------
  samples:          1
  evals/sample:     1

In [22]:
# CPU-MT test 1200 particelle
benchmark_CPUMT(1200)

BenchmarkTools.Trial: 
  memory estimate:  416.16 MiB
  allocs estimate:  26522088
  --------------
  minimum time:     12.218 s (0.67% GC)
  median time:      12.218 s (0.67% GC)
  mean time:        12.218 s (0.67% GC)
  maximum time:     12.218 s (0.67% GC)
  --------------
  samples:          1
  evals/sample:     1

In [23]:
# CPU-MT test 1500 particelle
benchmark_CPUMT(1500)

BenchmarkTools.Trial: 
  memory estimate:  517.18 MiB
  allocs estimate:  33120260
  --------------
  minimum time:     18.874 s (0.44% GC)
  median time:      18.874 s (0.44% GC)
  mean time:        18.874 s (0.44% GC)
  maximum time:     18.874 s (0.44% GC)
  --------------
  samples:          1
  evals/sample:     1

### CUDA Version Benchmark

In [24]:
include("src/CUDAmd.jl")

Main.CUDAmd

In [25]:
# Threads per blocco
const TPB = 256

256

In [26]:
function benchmark_CUDA(cuThreads, part_num)
    # Genera stato iniziale del sistema sulla base dei valori originali
    pos, vel, acc, masses, interactions, ptypes = CUDAmd.random_data(dim, part_num, num_part_types, box_size)
    # Benchmark
    @benchmark begin
        CUDAmd.CuArrays.@sync begin
            CUDAmd.dynamics_sim!($TPB, $nsteps, $sinterval, $false, $dt, $pos, $vel, $acc, $masses, 
                                 $interactions, $ptypes, $box_size, $periodic, $true)
        end
    end
end

benchmark_CUDA (generic function with 1 method)

In [27]:
## PRE-BENCHMARK ## (Compilazione JIT e Caching)

In [28]:
@time pos, vel, acc, masses, interactions, ptypes = CUDAmd.random_data(3, 10, 2, 10);

  1.435257 seconds (1.74 M allocations: 87.081 MiB, 2.32% gc time)


In [29]:
@time CUDAmd.dynamics_sim!(TPB, 3, 1, false, dt, pos, vel, acc, masses, interactions, ptypes, box_size, periodic, true);

 10.871836 seconds (14.29 M allocations: 713.670 MiB, 3.24% gc time)


In [30]:
# CUDA test 100 particelle
benchmark_CUDA(TPB, 100)

BenchmarkTools.Trial: 
  memory estimate:  4.49 MiB
  allocs estimate:  135026
  --------------
  minimum time:     647.481 ms (0.00% GC)
  median time:      647.649 ms (0.00% GC)
  mean time:        647.754 ms (0.00% GC)
  maximum time:     648.100 ms (0.00% GC)
  --------------
  samples:          8
  evals/sample:     1

In [31]:
# CUDA test 500 particelle
benchmark_CUDA(TPB, 500)

BenchmarkTools.Trial: 
  memory estimate:  4.50 MiB
  allocs estimate:  136026
  --------------
  minimum time:     3.377 s (0.00% GC)
  median time:      3.377 s (0.00% GC)
  mean time:        3.377 s (0.00% GC)
  maximum time:     3.377 s (0.00% GC)
  --------------
  samples:          2
  evals/sample:     1

In [32]:
# CUDA test 1000 particelle
benchmark_CUDA(TPB, 1000)

BenchmarkTools.Trial: 
  memory estimate:  4.56 MiB
  allocs estimate:  140026
  --------------
  minimum time:     6.727 s (0.00% GC)
  median time:      6.727 s (0.00% GC)
  mean time:        6.727 s (0.00% GC)
  maximum time:     6.727 s (0.00% GC)
  --------------
  samples:          1
  evals/sample:     1

In [33]:
# CUDA test 1200 particelle
benchmark_CUDA(TPB, 1200)

BenchmarkTools.Trial: 
  memory estimate:  4.56 MiB
  allocs estimate:  140026
  --------------
  minimum time:     8.045 s (0.00% GC)
  median time:      8.045 s (0.00% GC)
  mean time:        8.045 s (0.00% GC)
  maximum time:     8.045 s (0.00% GC)
  --------------
  samples:          1
  evals/sample:     1

In [34]:
# CUDA test 1500 particelle
benchmark_CUDA(TPB, 1500)

BenchmarkTools.Trial: 
  memory estimate:  4.56 MiB
  allocs estimate:  140026
  --------------
  minimum time:     10.034 s (0.00% GC)
  median time:      10.034 s (0.00% GC)
  mean time:        10.034 s (0.00% GC)
  maximum time:     10.034 s (0.00% GC)
  --------------
  samples:          1
  evals/sample:     1