In [1]:
# Azzerare un array il più velocemente possibile sembra essere una questione importante.

In [2]:
# Built-in function
function zeros_fast1!(a)
    fill!(a, 0)
    return nothing
end

zeros_fast1! (generic function with 1 method)

In [3]:
# Broadcast version
function zeros_fast2!(a)
    a .= zero(eltype(a))
    return nothing
end

zeros_fast2! (generic function with 1 method)

In [4]:
# SIMD version with no bounds checking
function zeros_fast3!(a)
    @simd for i in eachindex(a)
        @inbounds a[i] = zero(eltype(a))
    end
    return nothing
end

zeros_fast3! (generic function with 1 method)

In [None]:
# TODO: Multi-threaded versions 

In [5]:
# Benchmark
using BenchmarkTools

In [6]:
N = 1_000_000
@time begin
    a = Array{Float32}(undef, N)
    b = Array{Float32}(undef, N)
    c = Array{Float32}(undef, N)
end

  0.007472 seconds (1.01 k allocations: 11.502 MiB, 75.92% gc time)


1000000-element Array{Float32,1}:
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 ⋮  
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

In [7]:
@benchmark zeros_fast1!($a)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     73.800 μs (0.00% GC)
  median time:      74.900 μs (0.00% GC)
  mean time:        76.590 μs (0.00% GC)
  maximum time:     751.000 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [8]:
@benchmark zeros_fast2!($b)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     75.799 μs (0.00% GC)
  median time:      76.200 μs (0.00% GC)
  mean time:        77.286 μs (0.00% GC)
  maximum time:     243.701 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [9]:
@benchmark zeros_fast3!($c)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     70.199 μs (0.00% GC)
  median time:      70.401 μs (0.00% GC)
  mean time:        71.174 μs (0.00% GC)
  maximum time:     194.100 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     1