# Benchmark

In [1]:
using BenchmarkTools

In [2]:
# Valori originali del test di Crabb
const box_size = 1f3;
const dim = 3;
const dt = 0.01f0;
const num_part_types = 2;
const nsteps = 1000;
const periodic = true;
const sinterval = 10;

### Serial version benchmark

In [3]:
include("src/serialmd.jl")

Main.Serialmd

In [4]:
function benchmark_serial(part_num)
    # Genera stato iniziale del sistema sulla base dei valori originali
    pos, vel, acc, masses, interactions, ptypes = Serialmd.random_data(dim, part_num, num_part_types, box_size)
    # Benchmark
    @benchmark Serialmd.dynamics_sim!($nsteps, $sinterval, $false, $dt, $pos, $vel, $acc, $masses, 
                                      $interactions, $ptypes, $box_size, $periodic, $true)
end

benchmark_serial (generic function with 1 method)

In [5]:
## PRE-BENCHMARK ## (Compilazione JIT e Caching)

In [6]:
@time pos, vel, acc, masses, interactions, ptypes = Serialmd.random_data(3, 10, 2, 10);

  0.494463 seconds (1.18 M allocations: 60.165 MiB, 5.00% gc time)


In [7]:
@time Serialmd.dynamics_sim!(3, 1, false, dt, pos, vel, acc, masses, interactions, ptypes, box_size, periodic, true);

  0.379950 seconds (945.20 k allocations: 46.223 MiB, 3.00% gc time)


In [8]:
# Serial test, 100 particelle
benchmark_serial(100)

BenchmarkTools.Trial: 
  memory estimate:  118.59 KiB
  allocs estimate:  3
  --------------
  minimum time:     345.266 ms (0.00% GC)
  median time:      347.047 ms (0.00% GC)
  mean time:        347.277 ms (0.00% GC)
  maximum time:     351.135 ms (0.00% GC)
  --------------
  samples:          15
  evals/sample:     1

In [9]:
# Serial test, 500 particelle
benchmark_serial(500)

BenchmarkTools.Trial: 
  memory estimate:  592.02 KiB
  allocs estimate:  3
  --------------
  minimum time:     8.645 s (0.00% GC)
  median time:      8.645 s (0.00% GC)
  mean time:        8.645 s (0.00% GC)
  maximum time:     8.645 s (0.00% GC)
  --------------
  samples:          1
  evals/sample:     1

In [10]:
# Serial test, 1000 particelle
benchmark_serial(1000)

BenchmarkTools.Trial: 
  memory estimate:  1.16 MiB
  allocs estimate:  3
  --------------
  minimum time:     34.558 s (0.00% GC)
  median time:      34.558 s (0.00% GC)
  mean time:        34.558 s (0.00% GC)
  maximum time:     34.558 s (0.00% GC)
  --------------
  samples:          1
  evals/sample:     1

### CPU Multi Thread Version Benchmark

In [11]:
include("src/CPUMTmd.jl")

Main.CPUMTmd

In [12]:
println("Threads disponibili: ", CPUMTmd.nthreads())

Threads disponibili: 8


In [13]:
function benchmark_CPUMT(part_num)
    # Genera stato iniziale del sistema sulla base dei valori originali
    pos, vel, acc, masses, interactions, ptypes = CPUMTmd.random_data(dim, part_num, num_part_types, box_size)
    # Benchmark
    @benchmark begin
        @sync begin
            CPUMTmd.dynamics_sim!($nsteps, $sinterval, $false, $dt, $pos, $vel, $acc, $masses, 
                                  $interactions, $ptypes, $box_size, $periodic, $true)
        end
    end
end

benchmark_CPUMT (generic function with 1 method)

In [14]:
## PRE-BENCHMARK ## (Compilazione JIT e Caching)

In [15]:
@time pos, vel, acc, masses, interactions, ptypes = CPUMTmd.random_data(3, 10, 2, 10);

  0.119795 seconds (245.78 k allocations: 12.206 MiB)


In [16]:
@time CPUMTmd.dynamics_sim!(3, 1, false, dt, pos, vel, acc, masses, interactions, ptypes, box_size, periodic, true);

  0.390995 seconds (738.01 k allocations: 36.333 MiB, 2.23% gc time)


In [17]:
# CPU-MT test 100 particelle
benchmark_CPUMT(100)

BenchmarkTools.Trial: 
  memory estimate:  45.59 MiB
  allocs estimate:  2320490
  --------------
  minimum time:     255.041 ms (0.00% GC)
  median time:      296.451 ms (0.00% GC)
  mean time:        300.756 ms (3.49% GC)
  maximum time:     367.790 ms (8.12% GC)
  --------------
  samples:          17
  evals/sample:     1

In [18]:
# CPU-MT test 500 particelle
benchmark_CPUMT(500)

BenchmarkTools.Trial: 
  memory estimate:  180.30 MiB
  allocs estimate:  11118414
  --------------
  minimum time:     2.280 s (0.00% GC)
  median time:      2.348 s (2.83% GC)
  mean time:        2.327 s (1.92% GC)
  maximum time:     2.352 s (2.83% GC)
  --------------
  samples:          3
  evals/sample:     1

In [19]:
# CPU-MT test 1000 particelle
benchmark_CPUMT(1000)

BenchmarkTools.Trial: 
  memory estimate:  348.82 MiB
  allocs estimate:  22124237
  --------------
  minimum time:     8.934 s (0.90% GC)
  median time:      8.934 s (0.90% GC)
  mean time:        8.934 s (0.90% GC)
  maximum time:     8.934 s (0.90% GC)
  --------------
  samples:          1
  evals/sample:     1

### CUDA Version Benchmark

In [20]:
include("src/CUDAmd.jl")

Main.CUDAmd

In [21]:
# Threads per blocco
const TPB = 256

256

In [22]:
function benchmark_CUDA(cuThreads, part_num)
    # Genera stato iniziale del sistema sulla base dei valori originali
    pos, vel, acc, masses, interactions, ptypes = CUDAmd.random_data(dim, part_num, num_part_types, box_size)
    # Benchmark
    @benchmark begin
        CUDAmd.CuArrays.@sync begin
            CUDAmd.dynamics_sim!($TPB, $nsteps, $sinterval, $false, $dt, $pos, $vel, $acc, $masses, 
                                 $interactions, $ptypes, $box_size, $periodic, $true)
        end
    end
end

benchmark_CUDA (generic function with 1 method)

In [23]:
## PRE-BENCHMARK ## (Compilazione JIT e Caching)

In [24]:
@time pos, vel, acc, masses, interactions, ptypes = CUDAmd.random_data(3, 10, 2, 10);

  1.373348 seconds (1.74 M allocations: 87.043 MiB, 2.23% gc time)


In [25]:
@time CUDAmd.dynamics_sim!(TPB, 3, 1, false, dt, pos, vel, acc, masses, interactions, ptypes, box_size, periodic, true);

 10.797023 seconds (14.29 M allocations: 714.107 MiB, 3.33% gc time)


In [26]:
# CUDA test 100 particelle
benchmark_CUDA(256, 100)

BenchmarkTools.Trial: 
  memory estimate:  4.49 MiB
  allocs estimate:  135026
  --------------
  minimum time:     648.014 ms (0.00% GC)
  median time:      648.051 ms (0.00% GC)
  mean time:        648.569 ms (0.00% GC)
  maximum time:     652.127 ms (0.00% GC)
  --------------
  samples:          8
  evals/sample:     1

In [27]:
# CUDA test 500 particelle
benchmark_CUDA(256, 500)

BenchmarkTools.Trial: 
  memory estimate:  4.50 MiB
  allocs estimate:  136026
  --------------
  minimum time:     3.363 s (0.00% GC)
  median time:      3.363 s (0.00% GC)
  mean time:        3.363 s (0.00% GC)
  maximum time:     3.364 s (0.00% GC)
  --------------
  samples:          2
  evals/sample:     1

In [28]:
# CUDA test 1000 particelle
benchmark_CUDA(256, 1000)

BenchmarkTools.Trial: 
  memory estimate:  4.56 MiB
  allocs estimate:  140026
  --------------
  minimum time:     6.703 s (0.00% GC)
  median time:      6.703 s (0.00% GC)
  mean time:        6.703 s (0.00% GC)
  maximum time:     6.703 s (0.00% GC)
  --------------
  samples:          1
  evals/sample:     1