# Benchmark

In [1]:
using BenchmarkTools

In [2]:
# Valori originali del test di Crabb
const box_size = 1f3;
const dim = 3;
const dt = 0.01f0;
const num_part_types = 2;
const nsteps = 1000;
const periodic = true;
const sinterval = 10;

### Serial version benchmark

In [3]:
include("src/serialmd.jl")

Main.Serialmd

In [4]:
function benchmark_serial(part_num)
    # Genera stato iniziale del sistema sulla base dei valori originali
    pos, vel, acc, masses, interactions, ptypes = Serialmd.random_data(dim, part_num, num_part_types, box_size)
    # Benchmark
    @benchmark Serialmd.dynamics_sim!($nsteps, $sinterval, $false, $dt, $pos, $vel, $acc, $masses, 
                                      $interactions, $ptypes, $box_size, $periodic, $true)
end

benchmark_serial (generic function with 1 method)

In [5]:
## PRE-BENCHMARK ## (Compilazione JIT e Caching)

In [6]:
@time pos, vel, acc, masses, interactions, ptypes = Serialmd.random_data(3, 10, 2, 10);

  0.255078 seconds (1.17 M allocations: 59.384 MiB, 5.45% gc time)


In [7]:
@time Serialmd.dynamics_sim!(3, 1, false, dt, pos, vel, acc, masses, interactions, ptypes, box_size, periodic, true);

  0.091792 seconds (232.57 k allocations: 12.202 MiB)


In [8]:
# Serial test, 100 particelle
benchmark_serial(100)

BenchmarkTools.Trial: 
  memory estimate:  212.34 KiB
  allocs estimate:  1003
  --------------
  minimum time:     356.469 ms (0.00% GC)
  median time:      359.805 ms (0.00% GC)
  mean time:        360.634 ms (0.00% GC)
  maximum time:     375.805 ms (0.00% GC)
  --------------
  samples:          14
  evals/sample:     1

In [9]:
# Serial test, 500 particelle
#benchmark(500)

In [10]:
# Serial test, 1000 particelle
#benchmark(1000)

### CPU Multi Thread Version Benchmark

In [11]:
include("src/CPUMTmd.jl")

Main.CPUMTmd

In [12]:
println("Threads disponibili: ", CPUMTmd.nthreads())

Threads disponibili: 6


In [13]:
function benchmark_CPUMT(part_num)
    # Genera stato iniziale del sistema sulla base dei valori originali
    pos, vel, acc, masses, interactions, ptypes = CPUMTmd.random_data(dim, part_num, num_part_types, box_size)
    # Benchmark
    @benchmark begin
        @sync begin
            CPUMTmd.dynamics_sim!($nsteps, $sinterval, $false, $dt, $pos, $vel, $acc, $masses, 
                                  $interactions, $ptypes, $box_size, $periodic, $true)
        end
    end
end

benchmark_CPUMT (generic function with 1 method)

In [14]:
## PRE-BENCHMARK ## (Compilazione JIT e Caching)

In [15]:
@time pos, vel, acc, masses, interactions, ptypes = CPUMTmd.random_data(3, 10, 2, 10);

  0.096992 seconds (482.34 k allocations: 24.232 MiB)


In [16]:
@time CPUMTmd.dynamics_sim!(3, 1, false, dt, pos, vel, acc, masses, interactions, ptypes, box_size, periodic, true);

  0.153674 seconds (314.93 k allocations: 16.286 MiB, 3.22% gc time)


In [17]:
# CPU-MT test 100 particelle
benchmark_CPUMT(100)

BenchmarkTools.Trial: 
  memory estimate:  57.72 MiB
  allocs estimate:  3192969
  --------------
  minimum time:     122.488 ms (0.00% GC)
  median time:      152.616 ms (0.00% GC)
  mean time:        155.002 ms (6.01% GC)
  maximum time:     232.601 ms (10.49% GC)
  --------------
  samples:          33
  evals/sample:     1

In [18]:
# CPU-MT test 500 particelle
#benchmark_CPUMT(500)

In [19]:
# CPU-MT test 1000 particelle
#benchmark_CPUMT(1000)

### CUDA Version Benchmark

In [20]:
include("src/CUDAmd.jl")

└ @ CUDAnative C:\Users\enric\.julia\packages\CUDAnative\hfulr\src\CUDAnative.jl:153


Main.CUDAmd

In [21]:
function benchmark_CUDA(cuThreads, part_num)
    # Genera stato iniziale del sistema sulla base dei valori originali
    pos, vel, acc, masses, interactions, ptypes = CUDAmd.random_data(dim, part_num, num_part_types, box_size)
    # Benchmark
    @benchmark begin
        CUDAmd.CuArrays.@sync begin
            CUDAmd.dynamics_sim!($cuThreads, $nsteps, $sinterval, $false, $dt, $pos, $vel, $acc, $masses, 
                                 $interactions, $ptypes, $box_size, $periodic, $true)
        end
    end
end

benchmark_CUDA (generic function with 1 method)

In [22]:
## PRE-BENCHMARK ## (Compilazione JIT e Caching)

In [23]:
@time pos, vel, acc, masses, interactions, ptypes = CUDAmd.random_data(3, 10, 2, 10);

  0.793186 seconds (1.74 M allocations: 86.990 MiB, 2.35% gc time)


In [24]:
@time CUDAmd.dynamics_sim!(512, 3, 1, false, dt, pos, vel, acc, masses, interactions, ptypes, box_size, periodic, true);

  5.522771 seconds (13.81 M allocations: 689.178 MiB, 3.58% gc time)


In [25]:
# CUDA test 100 particelle
benchmark_CUDA(512, 100)

BenchmarkTools.Trial: 
  memory estimate:  4.52 MiB
  allocs estimate:  137026
  --------------
  minimum time:     265.270 ms (0.00% GC)
  median time:      270.344 ms (0.00% GC)
  mean time:        282.071 ms (0.06% GC)
  maximum time:     335.590 ms (0.00% GC)
  --------------
  samples:          18
  evals/sample:     1

In [26]:
# CUDA test 500 particelle
#benchmark_CUDA(512, 500)

In [27]:
# CUDA test 1000 particelle
#benchmark_CUDA(512, 1000)