# Benchmark

In [1]:
using BenchmarkTools

In [2]:
# Valori originali del test di Crabb
const box_size = 1f3;
const dim = 3;
const dt = 0.01f0;
const num_part_types = 2;
const nsteps = 1000;
const periodic = true;
const sinterval = 10;

### Serial version benchmark

In [3]:
include("src/serialmd.jl")

Main.Serialmd

In [4]:
function benchmark_serial(part_num)
    # Genera stato iniziale del sistema sulla base dei valori originali
    pos, vel, acc, masses, interactions, ptypes = Serialmd.random_data(dim, part_num, num_part_types, box_size)
    # Benchmark
    @benchmark Serialmd.dynamics_sim!($nsteps, $sinterval, $false, $dt, $pos, $vel, $acc, $masses, 
                                      $interactions, $ptypes, $box_size, $periodic, $true)
end

benchmark_serial (generic function with 1 method)

In [5]:
## PRE-BENCHMARK ## (Compilazione JIT e Caching)

In [6]:
@time pos, vel, acc, masses, interactions, ptypes = Serialmd.random_data(3, 10, 2, 10);

  0.238353 seconds (1.17 M allocations: 59.821 MiB, 5.22% gc time)


In [7]:
@time Serialmd.dynamics_sim!(3, 1, false, dt, pos, vel, acc, masses, interactions, ptypes, box_size, periodic, true);

  0.192054 seconds (925.98 k allocations: 45.209 MiB, 3.22% gc time)


In [8]:
# Serial test, 100 particelle
benchmark_serial(100)

BenchmarkTools.Trial: 
  memory estimate:  118.59 KiB
  allocs estimate:  3
  --------------
  minimum time:     195.991 ms (0.00% GC)
  median time:      197.108 ms (0.00% GC)
  mean time:        199.176 ms (0.00% GC)
  maximum time:     209.105 ms (0.00% GC)
  --------------
  samples:          26
  evals/sample:     1

In [28]:
# Serial test, 500 particelle
benchmark_serial(500)

BenchmarkTools.Trial: 
  memory estimate:  592.02 KiB
  allocs estimate:  3
  --------------
  minimum time:     5.087 s (0.00% GC)
  median time:      5.087 s (0.00% GC)
  mean time:        5.087 s (0.00% GC)
  maximum time:     5.087 s (0.00% GC)
  --------------
  samples:          1
  evals/sample:     1

In [29]:
# Serial test, 1000 particelle
benchmark_serial(1000)

BenchmarkTools.Trial: 
  memory estimate:  1.16 MiB
  allocs estimate:  3
  --------------
  minimum time:     20.242 s (0.00% GC)
  median time:      20.242 s (0.00% GC)
  mean time:        20.242 s (0.00% GC)
  maximum time:     20.242 s (0.00% GC)
  --------------
  samples:          1
  evals/sample:     1

### CPU Multi Thread Version Benchmark

In [11]:
include("src/CPUMTmd.jl")

Main.CPUMTmd

In [12]:
println("Threads disponibili: ", CPUMTmd.nthreads())

Threads disponibili: 12


In [13]:
function benchmark_CPUMT(part_num)
    # Genera stato iniziale del sistema sulla base dei valori originali
    pos, vel, acc, masses, interactions, ptypes = CPUMTmd.random_data(dim, part_num, num_part_types, box_size)
    # Benchmark
    @benchmark begin
        @sync begin
            CPUMTmd.dynamics_sim!($nsteps, $sinterval, $false, $dt, $pos, $vel, $acc, $masses, 
                                  $interactions, $ptypes, $box_size, $periodic, $true)
        end
    end
end

benchmark_CPUMT (generic function with 1 method)

In [14]:
## PRE-BENCHMARK ## (Compilazione JIT e Caching)

In [15]:
@time pos, vel, acc, masses, interactions, ptypes = CPUMTmd.random_data(3, 10, 2, 10);

  0.065973 seconds (245.76 k allocations: 12.205 MiB, 7.90% gc time)


In [16]:
@time CPUMTmd.dynamics_sim!(3, 1, false, dt, pos, vel, acc, masses, interactions, ptypes, box_size, periodic, true);

  0.169503 seconds (571.03 k allocations: 27.820 MiB)


In [17]:
# CPU-MT test 100 particelle
benchmark_CPUMT(100)

BenchmarkTools.Trial: 
  memory estimate:  67.71 MiB
  allocs estimate:  3277842
  --------------
  minimum time:     192.452 ms (0.00% GC)
  median time:      219.649 ms (9.86% GC)
  mean time:        228.406 ms (5.28% GC)
  maximum time:     348.919 ms (6.58% GC)
  --------------
  samples:          22
  evals/sample:     1

In [18]:
# CPU-MT test 500 particelle
benchmark_CPUMT(500)

BenchmarkTools.Trial: 
  memory estimate:  257.34 MiB
  allocs estimate:  15674967
  --------------
  minimum time:     1.338 s (5.65% GC)
  median time:      1.505 s (5.04% GC)
  mean time:        1.469 s (3.93% GC)
  maximum time:     1.531 s (4.96% GC)
  --------------
  samples:          4
  evals/sample:     1

In [19]:
# CPU-MT test 1000 particelle
benchmark_CPUMT(1000)

BenchmarkTools.Trial: 
  memory estimate:  494.45 MiB
  allocs estimate:  31176372
  --------------
  minimum time:     4.475 s (2.16% GC)
  median time:      4.608 s (2.13% GC)
  mean time:        4.608 s (2.13% GC)
  maximum time:     4.741 s (2.10% GC)
  --------------
  samples:          2
  evals/sample:     1

### CUDA Version Benchmark

In [9]:
include("src/CUDAmd.jl")

└ @ CUDAnative C:\Users\enric\.julia\packages\CUDAnative\hfulr\src\CUDAnative.jl:153


Main.CUDAmd

In [10]:
function benchmark_CUDA(cuThreads, part_num)
    # Genera stato iniziale del sistema sulla base dei valori originali
    pos, vel, acc, masses, interactions, ptypes = CUDAmd.random_data(dim, part_num, num_part_types, box_size)
    # Benchmark
    @benchmark begin
        CUDAmd.CuArrays.@sync begin
            CUDAmd.dynamics_sim!($cuThreads, $nsteps, $sinterval, $false, $dt, $pos, $vel, $acc, $masses, 
                                 $interactions, $ptypes, $box_size, $periodic, $true)
        end
    end
end

benchmark_CUDA (generic function with 1 method)

In [11]:
## PRE-BENCHMARK ## (Compilazione JIT e Caching)

In [12]:
@time pos, vel, acc, masses, interactions, ptypes = CUDAmd.random_data(3, 10, 2, 10);

  0.981422 seconds (2.66 M allocations: 134.511 MiB, 2.96% gc time)


In [13]:
@time CUDAmd.dynamics_sim!(512, 3, 1, false, dt, pos, vel, acc, masses, interactions, ptypes, box_size, periodic, true);

  5.642785 seconds (14.57 M allocations: 728.063 MiB, 3.76% gc time)


In [14]:
# CUDA test 100 particelle
benchmark_CUDA(512, 100)

BenchmarkTools.Trial: 
  memory estimate:  4.52 MiB
  allocs estimate:  137026
  --------------
  minimum time:     202.495 ms (0.00% GC)
  median time:      203.749 ms (0.00% GC)
  mean time:        203.851 ms (0.11% GC)
  maximum time:     205.990 ms (0.00% GC)
  --------------
  samples:          25
  evals/sample:     1

In [15]:
# CUDA test 500 particelle
benchmark_CUDA(512, 500)

BenchmarkTools.Trial: 
  memory estimate:  4.53 MiB
  allocs estimate:  138026
  --------------
  minimum time:     1.292 s (0.00% GC)
  median time:      1.315 s (0.00% GC)
  mean time:        1.320 s (0.00% GC)
  maximum time:     1.358 s (0.00% GC)
  --------------
  samples:          4
  evals/sample:     1

In [16]:
# CUDA test 1000 particelle
benchmark_CUDA(512, 1000)

BenchmarkTools.Trial: 
  memory estimate:  4.59 MiB
  allocs estimate:  142026
  --------------
  minimum time:     2.798 s (0.00% GC)
  median time:      2.813 s (0.00% GC)
  mean time:        2.813 s (0.00% GC)
  maximum time:     2.828 s (0.00% GC)
  --------------
  samples:          2
  evals/sample:     1