In [1]:
# Azzerare un array il più velocemente possibile sembra essere una questione importante.

In [2]:
# Built-in function
function zeros_fast1!(a)
    fill!(a, 0)
    return nothing
end

zeros_fast1! (generic function with 1 method)

In [3]:
# Broadcast version
function zeros_fast2!(a)
    a .= zero(eltype(a))
    return nothing
end

zeros_fast2! (generic function with 1 method)

In [4]:
# SIMD version with no bounds checking
function zeros_fast3!(a)
    z = zero(eltype(a))
    @simd for i in eachindex(a)
        @inbounds a[i] = z
    end
    return nothing
end

zeros_fast3! (generic function with 1 method)

In [5]:
# TODO: Multi-threaded versions 

In [6]:
# Benchmark
using BenchmarkTools

In [7]:
N = 1_000_000
@time begin
    a = Array{Float32}(undef, N)
    b = Array{Float32}(undef, N)
    c = Array{Float32}(undef, N)
    end;

  0.017252 seconds (1.01 k allocations: 11.502 MiB, 73.40% gc time)


In [8]:
@benchmark zeros_fast1!($a)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     74.700 μs (0.00% GC)
  median time:      230.700 μs (0.00% GC)
  mean time:        230.606 μs (0.00% GC)
  maximum time:     907.301 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [9]:
@benchmark zeros_fast2!($b)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     82.201 μs (0.00% GC)
  median time:      223.300 μs (0.00% GC)
  mean time:        223.022 μs (0.00% GC)
  maximum time:     421.100 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [10]:
@benchmark zeros_fast3!($c)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     68.299 μs (0.00% GC)
  median time:      208.799 μs (0.00% GC)
  mean time:        208.218 μs (0.00% GC)
  maximum time:     348.900 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     1