Title: julia computations benchmark

# [f(x,y) for x in, y in]

In [1]:
using BenchmarkTools

In [5]:
f!(x,y) = sin(x)+cos(y)
C = zeros(1000,1000)
@btime for y in 1:1000
       for x in 1:1000
       C[x,y] = f!(x,y)
       end
       end

  52.310 ms (1978000 allocations: 30.18 MiB)


for x, for y (Bad choice)

In [11]:
@btime for x in 1:1000
       for y in 1:1000
       C[x,y] = f!(x,y)
       end
       end

  52.861 ms (1978000 allocations: 30.18 MiB)


In [6]:
@btime Threads.@threads for y in 1:1000
       for x in 1:1000
       C[x,y] = f!(x,y)
       end
       end

  16.484 ms (1978021 allocations: 30.18 MiB)


Using multi threads and @inbounds

In [10]:
@btime Threads.@threads for y in 1:1000
       for x in 1:1000
       @inbounds C[x,y] = f!(x,y)
       end
       end

  14.812 ms (1978021 allocations: 30.18 MiB)


using generator

In [8]:
@btime [f!(x,y) for x in 1:1000, y in 1:1000];

  25.285 ms (2 allocations: 7.63 MiB)


In [14]:
using LoopVectorization
@btime @turbo for y in 1:1000
    for x in 1:1000
         C[x, y] = f!(x, y)
    end
end

  7.300 ms (11 allocations: 320 bytes)


In [19]:
function test!(C)
 @turbo for y in 1:1000
    for x in 1:1000
         C[x, y] = f!(x, y)
    end
end
end

@btime test!($C)

  7.423 ms (0 allocations: 0 bytes)


In [22]:
function test!(C)
  @turbo for y in 1:1000
    for x in 1:1000
         C[x, y] = f!(x, y)
    end
end
end

@btime test!($C)

  7.266 ms (0 allocations: 0 bytes)


# Function as inputs

In [7]:
using BenchmarkTools
function f(x,y)
    return  sin(x)*cos(y)
end
function decouple(x, y)
   @. y = f(x, y) + exp(x) * cos(y)
    return y
end

function couple(x,y)
    function f1(x,y)
        return sin(x)*cos(y)
    end
    @. y = f1(x,y) + exp(x)*cos(y)
    return y
end

function inputf(f::Function,x,y)
   @. y = f(x, y) + exp(x) * cos(y)
end
x = randn(100,100);
y = randn(100,100)
@btime decouple($x,$y);

  298.500 μs (0 allocations: 0 bytes)


In [9]:
@btime couple($x,$y);

  300.100 μs (0 allocations: 0 bytes)


In [15]:
@btime inputf(f,$x,$y);

  301.500 μs (5 allocations: 192 bytes)


In [17]:
@code_typed inputf(f, x, y)

CodeInfo(
[90m1 ───[39m %1   = Core.tuple(x)[36m::Tuple{Matrix{Float64}}[39m
[90m│    [39m %2   = %new(Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{2}, Nothing, typeof(exp), Tuple{Matrix{Float64}}}, exp, %1, nothing)[36m::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{2}, Nothing, typeof(exp), Tuple{Matrix{Float64}}}[39m
[90m│    [39m %3   = Core.tuple(y)[36m::Tuple{Matrix{Float64}}[39m
[90m│    [39m %4   = %new(Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{2}, Nothing, typeof(cos), Tuple{Matrix{Float64}}}, cos, %3, nothing)[36m::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{2}, Nothing, typeof(cos), Tuple{Matrix{Float64}}}[39m
[90m│    [39m %5   = Core.tuple(%2, %4)[36m::Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{2}, Nothing, typeof(exp), Tuple{Matrix{Float64}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{2}, Nothing, typeof(cos), Tuple{Matrix{Float64}}}}[39m
[90m│    