## C03 Making Fast Function Calls
### Using Globals
> Don't use globals in performance critical parts of code

#### The trouble with globals
Compiler has been unable to infer the type of result when working with the global variable, marking it as `Any`

In [1]:
using BenchmarkTools

In [2]:
p = 2
function pow_array(x::Vector{Float64})
    s = 0.0
    for y in x
        s += y^p
    end
    return s
end

pow_array (generic function with 1 method)

In [3]:
t = rand(100000);
@btime pow_array(t);

  3.891 ms (300000 allocations: 4.58 MiB)


In [4]:
@code_warntype pow_array(t)

MethodInstance for pow_array(::Vector{Float64})
  from pow_array([90mx[39m::[1mVector[22m[0m{Float64})[90m @[39m [90mMain[39m [90m~/code/Julia-HPC/[39m[90m[4m03-Functions.ipynb:2[24m[39m
Arguments
  #self#

[36m::Core.Const(pow_array)[39m
  x[36m::Vector{Float64}[39m
Locals
  @_3[33m[1m::Union{Nothing, Tuple{Float64, Int64}}[22m[39m
  s[91m[1m::Any[22m[39m
  y[36m::Float64[39m
Body[91m[1m::Any[22m[39m
[90m1 ─[39m

       

(s = 0.0)


[90m│  [39m %2  = x[36m::Vector{Float64}[39m
[90m│  [39m       (@_3 = Base.iterate(%

2))
[90m│  [39m %4  = (@_3 === nothing)[36m::Bool[39m
[90m│  [39m %5  = Base.not_int(%4)[36m::Bool[39m
[90m└──[39m       goto #4 if not %5
[90m2 ┄[39m %7  = @_3[36m::Tuple{Float64, Int64}[39m
[90m│  [39m       (y = Core.getfield(%7, 1))
[90m│  [39m %9  = Core.getfield(%7, 2)[36m::Int64[39m
[90m│  [39m %10 = s[91m[1m::Any[22m[39m
[90m│  [39m %11 = (y ^ Main.p)[91m[1m::Any[22m[39m
[90m│  [39m       (s = %10 + %11)
[90m│  [39m       (@_3 = Base.iterate(%2, %9))
[90m│  [39m %14 = (@_3 === nothing)[36m::Bool[39m
[90m│  [39m %15 = Base.not_int(%14)[36m::Bool[39m
[90m└──[39m       goto #4 if not %15
[90m3 ─[39m       goto #2
[90m4 ┄[39m       return s



#### Fixing performance issues with globals
There are two ways to correct the issues of globals
1. `const p`: which will constrain the type of variable p.
2. `pow(array::Vector{Float64}, p)`: make the `p` being the argument of function, during which the type of `p` will be inference.

In [5]:
const p2 = 2
function pow_array2(x::Vector{Float64})
    s = 0.0
    for y in x
        s += y^p2
    end
    return s
end

pow_array2 (generic function with 1 method)

In [6]:
@btime pow_array2(t);

  440.912 μs (0 allocations: 0 bytes)


In [7]:
@code_warntype pow_array2(t)

MethodInstance for pow_array2(::Vector{Float64})
  from pow_array2([90mx[39m::[1mVector[22m[0m{Float64})[90m @[39m [90mMain[39m [90m~/code/Julia-HPC/[39m[90m[4m03-Functions.ipynb:2[24m[39m
Arguments
  #self#[36m::Core.Const(pow_array2)[39m
  x[36m::Vector{Float64}[39m
Locals
  @_3[33m[1m::Union{Nothing, Tuple{Float64, Int64}}[22m[39m
  s[36m::Float64[39m
  y[36m::Float64[39m
Body[36m::Float64[39m
[90m1 ─[39m       (s = 0.0)
[90m│  [39m %2  = x[36m::Vector{Float64}[39m
[90m│  [39m       (@_3 = Base.iterate(%2))
[90m│  [39m %4  = (@_3 === nothing)[36m::Bool[39m
[90m│  [39m %5  = Base.not_int(%4)[36m::Bool[39m
[90m└──[39m       goto #4 if not %5
[90m2 ┄[39m %7  = @_3[36m::Tuple{Float64, Int64}[39m
[90m│  [39m       (y = Core.getfield(%7, 1))
[90m│  [39m %9  = Core.getfield(%7, 2)[36m::Int64[39m
[90m│  [39m %10 = s[36m::Float64[39m
[90m│  [39m %11 = (y ^ Main.p2)[36m::Float64[39m
[90m│  [39m       (s = %10 + %11)
[90m│  [

In [8]:
function pow_array3(x::Vector{Float64})
    return pow_array_inner(x, p)
end

function pow_array_inner(x, pow)
    s = 0.0
    for y in x
        s += y^pow
    end
    return s
end

pow_array_inner (generic function with 1 method)

In [9]:
@btime pow_array3(t);

  313.248 μs (1 allocation: 16 bytes)


### Inlining
#### Default Inlining


In [10]:
function f(x)
    a = x * 5
    b = a + 3
end
g(x) = f(2 * x)

g (generic function with 1 method)

In [14]:
@code_typed g(3)

CodeInfo(
[90m1 ─[39m %1 = Base.mul_int(2, x)[36m::Int64[39m
[90m│  [39m %2 = Base.mul_int(%1, 5)[36m::Int64[39m
[90m│  [39m %3 = Base.add_int(%2, 3)[36m::Int64[39m
[90m└──[39m      return %3
) => Int64

In [15]:
@code_llvm g(3)

[90m;  @ /home/zpp/code/Julia-HPC/03-Functions.ipynb:5 within `g`[39m
[95mdefine[39m [36mi64[39m [93m@julia_g_1771[39m[33m([39m[36mi64[39m [95msignext[39m [0m%0[33m)[39m [0m#0 [33m{[39m
[91mtop:[39m
[90m; ┌ @ /home/zpp/code/Julia-HPC/03-Functions.ipynb:2 within `f`[39m
[90m; │┌ @ int.jl:88 within `*`[39m
    [0m%1 

[0m= [96m[1mmul[22m[39m [36mi64[39m [0m%0[0m, [33m10[39m
[90m; │└[39m
[90m; │ @ /home/zpp/code/Julia-HPC/03-Functions.ipynb:3 within `f`[39m
[90m; │┌ @ int.jl:87 within `+`[39m
    [0m%2 [0m= [96m[1madd[22m[39m [36mi64[39m [0m%1[0m, [33m3[39m
[90m; └└[39m
  [96m[1mret[22m[39m [36mi64[39m [0m%2
[33m}[39m


#### Controlling inlining
- `@inline`: make function be inlining
- `@noinline`: make function not be inlining

In [18]:
@noinline function f(x)
    a = x * 5
    b = a + 3
    c = a - 4
    if c < 0
        throw(DomainError())
    elseif c < 2
        d = c^3
    else
        d = c^2
    end
end


f (generic function with 1 method)

In [19]:
@code_typed g(3)

CodeInfo(
[90m1 ─[39m %1 = Base.mul_int(2, x)[36m::Int64[39m
[90m│  [39m %2 = invoke Main.f(%1::Int64)[36m::Int64[39m
[90m└──[39m      return %2
) => Int64

In [23]:
@inline function f_in(x)
    a = x * 5
    b = a + 3
    c = a - 4
    if c < 0
        throw(DomainError())
    elseif c < 2
        d = c^3
    else
        d = c^2
    end
end

g_in(x) = f_in(2 * x)

g_in (generic function with 1 method)

In [24]:
@code_typed g_in(3)

CodeInfo(
[90m1 ─[39m %1  = Base.mul_int(2, x)[36m::Int64[39m
[90m│  [39m %2  = Base.mul_int(%1, 5)[36m::Int64[39m
[90m│  [39m %3  = Base.sub_int(%2, 4)[36m::Int64[39m
[90m│  [39m %4  = Base.slt_int(%3, 0)[36m::Bool[39m
[90m└──[39m       goto #3 if not %4
[90m2 ─[39m       Main.DomainError()[90m::Union{}[39m
[90m└──[39m       unreachable
[90m3 ─[39m %8  = Base.slt_int(%3, 2)[36m::Bool[39m
[90m└──[39m       goto #5 if not %8
[90m4 ─[39m %10 = Base.mul_int(%3, %3)[36m::Int64[39m
[90m│  [39m %11 = Base.mul_int(%10, %3)[36m::Int64[39m
[90m└──[39m       goto #6
[90m5 ─[39m %13 = Base.mul_int(%3, %3)[36m::Int64[39m
[90m└──[39m       goto #6
[90m6 ┄[39m %15 = φ (#4 => %11, #5 => %13)[36m::Int64[39m
[90m└──[39m       return %15
) => Int64

In [39]:
@btime g(3);
@btime g_in(3);

  1.264 ns (0 allocations: 0 bytes)


  1.263 ns (0 allocations: 0 bytes)


### Constant propagation


In [47]:
sqr(x) = x * x;
sqr2() = sqr(2)

sqr2 (generic function with 1 method)

In [46]:
@code_typed sqr(2)

CodeInfo(
[90m1 ─[39m %1 = Base.mul_int(x, x)[36m::Int64[39m
[90m└──[39m      return %1
) => Int64

In [41]:
@code_typed sqr2()

CodeInfo(
[90m1 ─[39m     return 4
) => Int64