# Benchmark

In [1]:
using BenchmarkTools

In [2]:
# Valori originali del test di Crabb
const box_size = 1e3;
const dim = 3;
const dt = 0.01;
const num_part_types = 2;
const nsteps = 1000;
const periodic = true;
const sinterval = 10;

### Serial version benchmark

In [3]:
include("src/serialmd.jl")

Main.Serialmd

In [4]:
function benchmark_serial(part_num)
    # Genera stato iniziale del sistema sulla base dei valori originali
    pos, vel, acc, masses, interactions, ptypes = Serialmd.random_data(dim, part_num, num_part_types, box_size)
    # Benchmark
    @benchmark Serialmd.dynamics_sim!($nsteps, $sinterval, false, $dt, $pos, $vel, $acc, $masses, $interactions, $ptypes, $box_size, $periodic, true)
end

benchmark_serial (generic function with 1 method)

In [5]:
## PRE-BENCHMARK ##

In [6]:
@time pos, vel, acc, masses, interactions, ptypes = Serialmd.random_data(3, 10, 2, 10);

  0.254059 seconds (1.17 M allocations: 59.378 MiB, 5.38% gc time)


In [7]:
@time Serialmd.dynamics_sim!(3, 1, false, 0.01, pos, vel, acc, masses, interactions, ptypes, box_size, periodic, true);

  0.092067 seconds (219.25 k allocations: 11.444 MiB)


In [8]:
# Serial test, 100 particelle
benchmark_serial(100)

BenchmarkTools.Trial: 
  memory estimate:  212.34 KiB
  allocs estimate:  1003
  --------------
  minimum time:     367.364 ms (0.00% GC)
  median time:      371.590 ms (0.00% GC)
  mean time:        371.890 ms (0.00% GC)
  maximum time:     379.829 ms (0.00% GC)
  --------------
  samples:          14
  evals/sample:     1

In [9]:
# Serial test, 500 particelle
#benchmark(500)

In [10]:
# Serial test, 1000 particelle
#benchmark(1000)

### CPU Multi Thread Version Benchmark

In [11]:
include("src/CPUMTmd.jl")

Main.CPUMTmd

In [12]:
println("Threads disponibili: ", CPUMTmd.nthreads())

Threads disponibili: 12


In [13]:
function benchmark_CPUMT(part_num)
    # Genera stato iniziale del sistema sulla base dei valori originali
    pos, vel, acc, masses, interactions, ptypes = CPUMTmd.random_data(dim, part_num, num_part_types, box_size)
    # Benchmark
    @benchmark CPUMTmd.dynamics_sim!($nsteps, $sinterval, false, $dt, $pos, $vel, $acc, $masses, $interactions, $ptypes, $box_size, $periodic, true)
end

benchmark_CPUMT (generic function with 1 method)

In [14]:
## PRE-BENCHMARK ##

In [15]:
@time pos, vel, acc, masses, interactions, ptypes = CPUMTmd.random_data(3, 10, 2, 10);

  0.061209 seconds (248.59 k allocations: 12.324 MiB)


In [16]:
@time CPUMTmd.dynamics_sim!(3, 1, false, 0.01, pos, vel, acc, masses, interactions, ptypes, box_size, periodic, true);

  0.237768 seconds (1.16 M allocations: 59.013 MiB, 2.16% gc time)


In [17]:
# CPU-MT test 100 particelle
benchmark_CPUMT(100)

BenchmarkTools.Trial: 
  memory estimate:  3.61 GiB
  allocs estimate:  240980949
  --------------
  minimum time:     3.541 s (30.15% GC)
  median time:      3.566 s (29.82% GC)
  mean time:        3.566 s (29.82% GC)
  maximum time:     3.591 s (29.49% GC)
  --------------
  samples:          2
  evals/sample:     1

In [18]:
# CPU-MT test 500 particelle
#benchmark_CPUMT(500)

In [19]:
# CPU-MT test 1000 particelle
#benchmark_CPUMT(1000)

### CUDA Version Benchmark

In [20]:
include("src/CUDAmd.jl")

└ @ CUDAnative C:\Users\enric\.julia\packages\CUDAnative\hfulr\src\CUDAnative.jl:153


Main.CUDAmd

In [21]:
function benchmark_CUDA(part_num)
    # Genera stato iniziale del sistema sulla base dei valori originali
    pos, vel, acc, masses, interactions, ptypes = CUDAmd.random_data(dim, part_num, num_part_types, box_size)
    # Benchmark
    @benchmark CUDAmd.dynamics_sim!($nsteps, $sinterval, false, $dt, $pos, $vel, $acc, $masses, $interactions, $ptypes, $box_size, $periodic, true)
end

benchmark_CUDA (generic function with 1 method)

In [22]:
## PRE-BENCHMARK ##

In [23]:
@time pos, vel, acc, masses, interactions, ptypes = CUDAmd.random_data(3, 10, 2, 10);

  0.837299 seconds (1.87 M allocations: 93.244 MiB, 1.15% gc time)


In [24]:
@time CUDAmd.dynamics_sim!(3, 1, false, 0.01, pos, vel, acc, masses, interactions, ptypes, box_size, periodic, true);

  5.698255 seconds (14.39 M allocations: 719.400 MiB, 3.56% gc time)


In [25]:
# CUDA test 100 particelle
benchmark_CUDA(100)

BenchmarkTools.Trial: 
  memory estimate:  4.58 MiB
  allocs estimate:  137027
  --------------
  minimum time:     132.411 ms (0.00% GC)
  median time:      406.847 ms (0.00% GC)
  mean time:        387.640 ms (0.06% GC)
  maximum time:     419.547 ms (0.00% GC)
  --------------
  samples:          13
  evals/sample:     1

In [26]:
# CUDA test 500 particelle
#benchmark_CUDA(500)

In [27]:
# CUDA test 1000 particelle
#benchmark_CUDA(1000)