# Loading libraries

In [None]:
# import Pkg; Pkg.add(["Colors", "Plots", "Libdl", "BenchmarkTools", "Statistics", "Measurements"])
using Colors, Plots, Libdl, BenchmarkTools, Statistics, Measurements

## Random numbers and random matrics

In [None]:
rand(5)

In [None]:
?rand

In [None]:
randn(5)

In [None]:
rand(3, 3)

In [None]:
randn(3, 3)

In [None]:
rand(3, 3, 3)

In [None]:
randn(3, 3, 3)

In [None]:
1:7

In [None]:
rand(1:7, 3, 3)

In [None]:
palette = distinguishable_colors(20)

In [None]:
rand(palette, 5, 5)

## Defining new `Numbers` types
![Julia hierarchy of numbers](../SeminarPictures/Julia-number-type-hierarchy.svg)

In [None]:
subtypes(AbstractFloat)

In [None]:
a = measurement(4.5, 0.5)

## $b = 3 \pm 0.2$

In [None]:
a = 4.5 ± 0.1
b = 4.5 ± 0.1

In [None]:
a + b

In [None]:
a - b

In [None]:
a - a

In [None]:
typeof(a)

## There are lies, big lies and benchmarks

Examples taken from
[*Julia is fast*](https://github.com/JuliaComputing/JuliaBoxTutorials/blob/master/introductory-tutorials/intro-to-julia/09.%20Julia%20is%20fast.ipynb) at JuliaComputing GitHub account. There are based on [lecture](https://github.com/mitmath/18S096/blob/master/lectures/lecture1/Boxes-and-registers.ipynb) by Steven G. Johnson form MIT. He is creator of Fastest Fourier Transform in the West (FFTW).

We test speed of Julia using simple case of function summing elements of vector $V$ with $N = 10^{ 7 }$ random Floats64 from $[ 0, 1 )$.
\begin{equation}
    \textrm{sum}(V) = \sum_{ i = 1 }^{ N } V_{ i }
\end{equation}

## Function in Julia are faster that "free" code.

In [None]:
gr()
V = rand(10^7)  # 1D vector of random numbers, uniform on [0,1)

In [None]:
sum(V) / 10^7

## 1. The C language, by `gcc`

### I don't write this C code.

In [None]:
C_code = """
#include <stddef.h>
double c_sum(size_t n, double *X) {
    double s = 0.0;
    for (size_t i = 0; i < n; ++i) {
        s += X[i];
    }
    return s;
}
"""

const Clib = tempname()   # make a temporary file


# compile to a shared library by piping C_code to gcc
# (works only if you have gcc installed):

open(`gcc -fPIC -O3 -msse3 -xc -shared -o $(Clib * "." * Libdl.dlext) -`, "w") do f
    print(f, C_code) 
end

# define a Julia function that calls the C function:
c_sum(X::Array{Float64}) = ccall(("c_sum", Clib), Float64, (Csize_t, Ptr{Float64}), length(X), X)

### Approx

In [None]:
(c_sum(V) - sum(V)) / 10^7

In [None]:
c_bench = @benchmark c_sum($V)

## We creat efficient dictionary

In [None]:
d_min = Dict("C" => minimum(c_bench.times) / 1e6)
d_med = Dict("C" => median(c_bench.times) / 1e6)

In [None]:
t_c = c_bench.times / 1e6

In [None]:
min_t = 3.0
max_t = 30.0

histogram(t_c, bins=500,
             title="Benchmarks for hand-written C",
             xlim=(min_t, max_t),
             xlabel="milliseconds", ylabel="number of events", label="")

## 2. C with `gcc -ffast-math`

### I don't write this code either.

In [None]:
const Clib_fastmath = tempname()   # make a temporary file

# The same as above but with a -ffast-math flag added
open(`gcc -fPIC -O3 -msse3 -xc -shared -ffast-math -o $(Clib_fastmath * "." * Libdl.dlext) -`, "w") do f
    print(f, C_code) 
end

# define a Julia function that calls the C function:
c_sum_fastmath(X::Array{Float64}) = ccall(("c_sum", Clib_fastmath), Float64, (Csize_t, Ptr{Float64}), length(X), X)

In [None]:
c_fastmath_bench = @benchmark $c_sum_fastmath($V)
d_min["C -ffast-math"] = minimum(c_fastmath_bench.times) / 1e6  # in milliseconds
d_med["C -ffast-math"] = median(c_fastmath_bench.times) / 1e6

t_c_fastmath = c_fastmath_bench.times / 1e6 # times in milliseconds

In [None]:
min_t = 3.0
max_t = 30.0

histogram(t_c_fastmath, bins=500,
             title="Benchmarks for hand-written C with -ffast-math",
             xlim=(min_t, max_t),
             xlabel="milliseconds", ylabel="number of events", label="")

## 3. Julia built-in `sum`

In [None]:
@which sum(V)  # This code on GitHub tell you probably nothing

In [None]:
j_bench = @benchmark sum($V)
d_min["Julia built-in"] = minimum(j_bench.times) / 1e6  # in milliseconds
d_med["Julia built-in"] = median(j_bench.times) / 1e6

t_j = j_bench.times / 1e6 # times in milliseconds


histogram(t_j, bins=500,
             title="Benchmarks for Julia built-in sum function",
             xlim=(min_t, max_t),
             xlabel="milliseconds", ylabel="number of events", label="")

In [None]:
min_t = 3.0
max_t = 10.0

histogram(t_j, bins=500,
             title="Benchmarks for Julia built-in sum function",
             xlim=(min_t, max_t),
             xlabel="milliseconds", ylabel="number of events", label="")

## 4. Julia hand-written `my_sum`

In [None]:
function my_sum(V)   
    s = zero(eltype(V))
    for v in V
        s += v
    end
    s
end

j_bench_hand = @benchmark my_sum($V)
d_min["Julia hand-written"] = minimum(j_bench_hand.times) / 1e6  # in milliseconds
d_med["Julia hand-written"] = median(j_bench_hand.times) / 1e6

t_my_sum = j_bench_hand.times / 1e6  # times in milliseconds

histogram(t_my_sum, bins=500,
                    title="Benchmarks for Julia hand-written sum function",
                    xlim=(min_t, max_t),
                    xlabel="milliseconds", ylabel="number of events", label="")

In [None]:
min_t = 3.0
max_t = 30.0

histogram(t_my_sum, bins=500,
             title="Benchmarks for Julia hand-written sum function",
             xlim=(min_t, max_t),
             xlabel="milliseconds", ylabel="number of events", label="")

## Julia hand-written `my_sum_simd` with `@simd` instruction

In [None]:
?@simd

In [None]:
function my_sum_simd(V)   
    s = zero(eltype(V))
    @simd for v in V
        s += v
    end
    s
end

j_bench_hand_simd = @benchmark my_sum_simd($V)
d_min["Julia hand-written simd"] = minimum(j_bench_hand_simd.times) / 1e6  # in milliseconds
d_med["Julia hand-written simd"] = median(j_bench_hand_simd.times) / 1e6

t_simd = j_bench_hand_simd.times / 1e6  # times in milliseconds

histogram(t_simd, bins=500,
             title="Benchmarks for Julia hand-written sum function with @smid",
             xlim=(min_t, max_t),
             xlabel="milliseconds", ylabel="count", label="")

## Summary of benchmarking

In [None]:
println("Median of time of computing sum(V)")
for (key, value) in sort(collect(d_med), by=last)
    println(rpad(key, 26, "."), lpad(round(value; digits=5), 6, "."))
end

println("\n\n\nMinimal time of computing sum(V)")
for (key, value) in sort(collect(d_min), by=last)
    println(rpad(key, 26, "."), lpad(round(value; digits=5), 6, "."))
end

In [None]:
println("Julia hand-written my_sum have median $(round(100 * d_med["C"] / d_med["Julia hand-written"]; digits=2))% of speed of hand-written C speed.")

In [None]:
?round

## Bad code is slow, good code is, mostly, fast

In [None]:
v = rand(10^7)

function sum_bad(V)
    s = zero(eltype(v))
    for v in V
        s += v
    end
    s
end

function sum_good(V)
    s = zero(eltype(V))
    for v in V
        s += v
    end
    s
end

In [None]:
sum_bad_bench = @benchmark sum_bad($v)
sum_good_bench = @benchmark sum_good($v)

t_bad = sum_bad_bench.times / 1e6
t_good = sum_good_bench.times / 1e6

bad_dict = Dict("Minimum time" => minimum(t_bad))
bad_dict["Median time"] = median(t_bad)
bad_dict["Maximum time"] = maximum(t_bad)

good_dict = Dict("Minimum time" => minimum(t_good))
good_dict["Median time"] = median(t_good)
good_dict["Maximum time"] = maximum(t_bad)

In [None]:
t₀ = 0.0
t₁ = bad_dict["Maximum time"] + 50


histogram(t_good, bins=4,
                 title="Good Julia vs bad Julia",
                 xlim=(t₀, t₁),
                 ylim=(-5, 100),
                 xlabel="miliseconds", ylabel="count", label="Good code")

histogram!(t_bad, bins=4,
                   label="Bad code")

In [None]:
println("Median time of bad code is $(bad_dict["Median time"] / good_dict["Median time"]) times bigger than that of good code.")

In [None]:
println("Results for sum_bad in miliseconds [ms]")
for (key, value) in sort(collect(bad_dict), by=last)
    println(rpad(key, 26, "."), lpad(round(value; digits=5), 6, "."))
end

println("\n\n\nResults for sum_good in miliseconds [ms]")
for (key, value) in sort(collect(good_dict), by=last)
    println(rpad(key, 26, "."), lpad(round(value; digits=5), 6, "."))
end