In [5]:
import Base: iterate, exp, log, sin, cos, tan, +, ^, -, *, /, sqrt, convert, promote_rule, zero,max, isless
using BenchmarkTools
using Plots

In [6]:
mutable struct Dual{T <: Number} <: Number
    value::T
    dfdy::T
    parent::Union{Dual{T}, Array{Dual{T},1}, Int, Nothing}
    backpropagation!
end

In [7]:
function convert(::Type{Dual{T}}, x::T) where T <: Number
    Dual(x, zero(T), nothing, (dfdy, parents) -> nothing)
end

function convert(::Type{Dual{T}}, x::S) where {T, S <: Number}
    Dual(T(x), zero(T), nothing, (dfdy, parents) -> nothing)
end

function convert(::Type{Dual{T}}, x::Dual{T}) where T
    x
end


convert (generic function with 256 methods)

In [8]:
function zero(x::Dual{T}) where T
    Dual(zero(T), zero(T), nothing, (dfdy, parents) -> nothing)
end

zero (generic function with 37 methods)

In [10]:
function promote_rule(::Type{Dual{T}}, ::Type{S}) where {T, S <: Number}
    Dual{promote_type(T,S)}
end

function promote_rule(::Type{T}, ::Type{Dual{S}}) where {T <: Number, S}
    Dual{promote_type(T,S)}
end

function promote_rule(::Type{S}, ::Type{Dual{T}}) where {S <: AbstractIrrational, T}
    Dual{promote_type(S, T)}
end

promote_rule (generic function with 154 methods)

In [11]:
function push_parents!(queue::Array{Dual{T}, 1}, ::Nothing) where T
end
function push_parents!(queue::Array{Dual{T}, 1}, i::Int) where T
end
function push_parents!(queue::Array{Dual{T}, 1}, ls::Array{Dual{T}, 1}) where T
    append!(queue, ls)
end
function push_parents!(queue::Array{Dual{T}, 1}, l::Dual{T}) where T
    push!(queue, l)
end

push_parents! (generic function with 4 methods)

In [12]:
function backprop!(l::Dual{T}) where T
    backprop!([l])
end
function backprop!(queue::Array{Dual{T},1}) where T
    while length(queue) > 0
        l = popfirst!(queue)
        l.backpropagation!(l.dfdy, l.parent)
        push_parents!(queue, l.parent)
    end
end

backprop! (generic function with 2 methods)

In [13]:
function collect_outputs(l::Dual{T}) where T
    queue = Dual{T}[l]

    outputs = Dual{T}[]

    while length(queue) > 0
        l = popfirst!(queue)
        if typeof(l.parent) <: Int
            push!(outputs, l)
        elseif typeof(l.parent) == Dual{T}
            push!(queue, l.parent)
        elseif typeof(l.parent) == Array{Dual{T}, 1}
            append!(queue, l.parent)
        end
    end

    outputs
end


collect_outputs (generic function with 1 method)

In [14]:
function derivativeСalculation(f)
    function dfdx(x::T) where T <: Number
        x = Dual(x, zero(x), 1, (dfdy, parents) -> nothing)
        result = f(x)
        result.dfdy = one(result.value)
        backprop!(result)
        y = collect_outputs(result)[1]
        return y.dfdy
    end

    function dfdx(x::Array{T}, i) where T <: Number
        fargs = [Dual(xelt, zero(xelt), i, (dfdy, parents) -> nothing) for (i, xelt) in enumerate(x)]
        result = f(fargs)[i]
        result.dfdy = one(result.value)
        backprop!(result)
        y = collect_outputs(result)
        grad = zeros(typeof(result.value), length(x))
        for yelt in y
            grad[yelt.parent] = yelt.dfdy
        end

        return grad
    end

    function dfdx(x...)
        fargs = [Dual(xelt, zero(xelt), i, (dfdy, parents) -> nothing) for (i, xelt) in enumerate(x)]
        result = f(fargs...)
        result.dfdy = one(result.value)
        backprop!(result)
        y = collect_outputs(result)
        grad = zeros(typeof(result.value), length(x))
        for yelt in y
            grad[yelt.parent] = yelt.dfdy
        end

        return grad
    end

    return dfdx
end

derivativeСalculation (generic function with 1 method)

In [15]:
function derivativeСalculation(i::Integer, f)
    df = derivativeСalculation(f)
    function df_wrapper(x...)
        g = df(x...)
        return g[i]
    end
    return df_wrapper
end

derivativeСalculation (generic function with 2 methods)

In [16]:
function backpropagationForPlus!(dfdy, xy)
    x, y = xy
    x.dfdy += dfdy
    y.dfdy += dfdy
end

function +(x::Dual{T}, y::Dual{T}) where T
    if x == y
        new_y = Dual(y.value, y.dfdy, y.parent, y.backpropagation!)
        return Dual(x.value + y.value, zero(T), [x, new_y], backpropagationForPlus!)
    end
    Dual(x.value + y.value, zero(T), [x, y], backpropagationForPlus!)
end

function backpropagationForMinus!(dfdy, xy) 
    x, y = xy
    x.dfdy += dfdy
    y.dfdy -= dfdy
end

function -(x::Dual{T}, y::Dual{T}) where T
    Dual(x.value - y.value, zero(T), [x, y], backpropagationForMinus!)
end

function backpropagationForMinusOneDual!(dfdy, x)
    x.dfdy -= dfdy
end

function -(x::Dual{T}) where T
    Dual(-x.value, zero(T), x, backpropagationForMinusOneDual!)
end

function backpropagationForMultiplication!(dfdy, xy)
    x,y = xy    
    x.dfdy += dfdy*y.value
    y.dfdy += x.value*dfdy
end


function *(x::Dual{T}, y::Dual{T}) where T
    return Dual(x.value*y.value, zero(T), [x,y], backpropagationForMultiplication!)
end

function /(x::Dual{T}, y::Dual{T}) where T
    yinv = one(T)/y.value
    function backpropagationForDividing!(dfdy, xy)
        a,b = xy
        a.dfdy += dfdy*yinv
        b.dfdy -= a.value*dfdy*(yinv*yinv)
    end
    if x == y
        new_y = Dual(y.value, y.dfdy, y.parent, y.backpropagation!)
        return Dual(x.value*yinv, zero(T), [x, new_y], backpropagationForDividing!)
    end
    Dual(x.value*yinv, zero(T), [x,y], backpropagationForDividing!)
end

function exp(x::Dual{T}) where T
    expValue = exp(x.value)

    function bp!(dfdy, p)
        p.dfdy += dfdy*expValue
    end

    Dual(expValue, zero(expValue), x, bp!)
end

function exp(xs::Array)
    return [exp(x) for x in xs]
end

function sin(x::Dual{T}) where T
    
    function bp!(dfdy, p)
        p.dfdy += cos(x.value)*dfdy
    end

    sx = sin(x.value)
    Dual(sx, zero(sx), x, bp!)
end

function cos(x::Dual{T}) where T
    
    function bp!(dfdy, p)
        p.dfdy -= sin(x.value)*dfdy
    end

    cx = cos(x.value)
    Dual(cx, zero(cx), x, bp!)
end

function tan(x::Dual{T}) where T
    c = cos(x.value)
    
    function bp!(dfdy, p)
        p.dfdy += dfdy/(c*c)
    end

    tx = tan(x.value)
    Dual(tx, zero(tx), x, bp!)
end

function sqrt(x::Dual{T}) where T
    sqrtValue = sqrt(x.value)

    function bp!(dfdy, p)
        p.dfdy += dfdy/(2*sqrtValue)
    end

    Dual(sqrtValue, zero(sqrtValue), x, bp!)
end

function ^(a::Dual{T}, x::Dual{T}) where T
    value = a.value^x.value
    function bp!(dfdy, xy)
        a,x = xy
        a.dfdy += dfdy * (x.value) * (a.value)^(x.value - 1)
        x.dfdy += dfdy * a.value^x.value*log(a.value)
    end

    Dual(value, zero(value), [a, x], bp!)
end

function ^(x::Dual{T}, n::Integer) where T
    value = x.value^n
    function bp!(dfdy, x)
        x.dfdy += n * dfdy*x.value^(n-1)
    end
    
    Dual(value, zero(value), x, bp!)
end

function log(x::Dual{T}) where T
    
    function bp!(dfdy, p)
        p.dfdy += dfdy/x.value
    end
    
    Dual(log(x.value), zero(T), x, bp!)
end

function max(a, x::Dual{T}) where T
    
    function bp!(dfdy,p)
        p.dfdy += x.value < a ? a : 1 * dfdy
    end
    
    Dual(max(0, x.value), zero(T), x, bp!)
    
end

isless(x::Dual, y::Dual) = x.value < y.value;

In [17]:
function softmax(vector::Array)
    e = exp(vector)
    return e / sum(e)
end

softmax (generic function with 1 method)

In [18]:
J = function jacobian(f, number_of_functions, args::Vector{T}) where {T <:Number}
    jacobian_rows = Matrix{T}[]
    d = derivativeСalculation(f)  
    for i=1:number_of_functions
        rows = d(args,i)
        push!(jacobian_rows, rows[:,:])
    end
    jacobian_rows
end

jacobian (generic function with 1 method)

## Definicja funkcji testowych

In [19]:
function styblinskiTang(x)
    value = zero(x[1])
    for i=1:length(x)
        value += x[i]^4 - 16 * x[i]^2 + 5 * x[i]
    end
    value / 2
end

function rosenbrock(x)
    value = zero(x[1])
    for i=2:length(x)
        value += (1-x[i-1])^2 + 100*(x[i] - x[i-1]^2)^2
    end
    value
end

rosenbrock (generic function with 1 method)

In [20]:
function TestFunction1(x)
    f1 = 123.83*(x[2]^8)^2 + cos(x[1]^18) - sin(x[1])/exp(x[2])
end

function TestFunction2(x)
    f1 = 222.83*(x[2]^8+x[1]^-3)^2 + sin(x[1]^18) - cos(x[2])/exp(x[1])
    f2 = x[1] - 5/x[2] + 4*x[1]^2-2*x[2] / (x[2]*sin(x[2])^2 + 1)
    [f1,f2]
end

function viennet(x)
    f1 = 0.5*(x[1]^2+x[2]^2) + sin(x[1]^2+ x[2]^2)
    f2 = (x[1]-2*x[2]+4)^2/8 + (x[1]-x[2]+1)^2/27 + 15
    f3 = 1/(x[1]^2+x[2]^2+1) - 1.1* exp(-(x[1]^2+x[2]^2))
    [f1,f2,f3]
end

function TestFunction4(x)
    f1 = 222.83*(x[2]^8+x[1]^-3)^2 + sin(x[1]^18) - cos(x[2])/exp(x[1])
    f2 = (x[1]^15-234.32)^3 + (tan(x[2]^5) + sin(x[1])^98)^36 
    f3 = x[1]*x[2]^34 - tan(343)^2 - sin(x[1])^8
    f4 = (x[1]^2+x[2]-4325.43)^23 - cos(x[2])*sin(x[1])^3
    [f1,f2,f3,f4]
end

function TestFunction5(x)
    f1 = 222.83*(x[2]^8+x[1]^-3)^2 + sin(x[1]^18) - cos(x[2])/exp(x[1])
    f2 = x[1]+x[1]^232+(sqrt(x[2]))^2+ (x[1]+x[2]^3)/233
    f3 = 2342434+23*x[1]^32-tan(x[2])+ (x[2]*sin(x[2])^2 + 1)
    f4 = 4*x[1]^2-2*x[2] / (x[2]*sin(x[2])^2 + 1)
    f5 = x[1] - 5/x[2] + 4*x[1]^2-2*x[2]
    [f1,f2,f3,f4,f5]
end

TestFunction5 (generic function with 1 method)

## Testy funkcji weluzmiennych 

### Testowanie funkcji Rosenbrock

In [21]:
@benchmark J(rosenbrock,1,[rand(5,1)...]) 

BenchmarkTools.Trial: 
  memory estimate:  7.98 KiB
  allocs estimate:  190
  --------------
  minimum time:     7.233 μs (0.00% GC)
  median time:      7.654 μs (0.00% GC)
  mean time:        8.823 μs (8.66% GC)
  maximum time:     1.185 ms (98.63% GC)
  --------------
  samples:          10000
  evals/sample:     4

In [17]:
@benchmark J(rosenbrock,1,[rand(50,1)...]) 

BenchmarkTools.Trial: 
  memory estimate:  76.64 KiB
  allocs estimate:  2041
  --------------
  minimum time:     77.329 μs (0.00% GC)
  median time:      79.840 μs (0.00% GC)
  mean time:        85.987 μs (5.37% GC)
  maximum time:     2.965 ms (96.29% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [18]:
@benchmark J(rosenbrock,1,[rand(100,1)...]) 

BenchmarkTools.Trial: 
  memory estimate:  151.41 KiB
  allocs estimate:  4092
  --------------
  minimum time:     148.002 μs (0.00% GC)
  median time:      158.158 μs (0.00% GC)
  mean time:        171.302 μs (5.11% GC)
  maximum time:     3.895 ms (91.77% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [19]:
@benchmark J(rosenbrock,1,[rand(500,1)...]) 

BenchmarkTools.Trial: 
  memory estimate:  741.47 KiB
  allocs estimate:  20494
  --------------
  minimum time:     753.102 μs (0.00% GC)
  median time:      770.391 μs (0.00% GC)
  mean time:        821.326 μs (5.41% GC)
  maximum time:     4.525 ms (78.57% GC)
  --------------
  samples:          6070
  evals/sample:     1

In [20]:
@benchmark J(rosenbrock,1,[rand(1000,1)...]) 

BenchmarkTools.Trial: 
  memory estimate:  1.46 MiB
  allocs estimate:  41485
  --------------
  minimum time:     1.528 ms (0.00% GC)
  median time:      1.569 ms (0.00% GC)
  mean time:        1.668 ms (5.56% GC)
  maximum time:     5.283 ms (63.97% GC)
  --------------
  samples:          2992
  evals/sample:     1

In [21]:
@benchmark J(rosenbrock,1,[rand(5000,1)...]) 

BenchmarkTools.Trial: 
  memory estimate:  7.26 MiB
  allocs estimate:  209492
  --------------
  minimum time:     7.925 ms (0.00% GC)
  median time:      8.249 ms (0.00% GC)
  mean time:        8.805 ms (6.99% GC)
  maximum time:     12.632 ms (36.37% GC)
  --------------
  samples:          568
  evals/sample:     1

In [22]:
@benchmark J(rosenbrock,1,[rand(10000,1)...]) 

BenchmarkTools.Trial: 
  memory estimate:  14.53 MiB
  allocs estimate:  419493
  --------------
  minimum time:     16.464 ms (0.00% GC)
  median time:      17.127 ms (0.00% GC)
  mean time:        18.512 ms (9.16% GC)
  maximum time:     24.263 ms (16.97% GC)
  --------------
  samples:          270
  evals/sample:     1

### Testowanie funkcji styblinskiTang

In [22]:
@benchmark J(styblinskiTang,1,[rand(5,1)...]) 

BenchmarkTools.Trial: 
  memory estimate:  10.14 KiB
  allocs estimate:  218
  --------------
  minimum time:     8.712 μs (0.00% GC)
  median time:      9.096 μs (0.00% GC)
  mean time:        10.095 μs (7.79% GC)
  maximum time:     1.353 ms (99.06% GC)
  --------------
  samples:          10000
  evals/sample:     3

In [24]:
@benchmark J(styblinskiTang,1,[rand(50,1)...]) 

BenchmarkTools.Trial: 
  memory estimate:  73.52 KiB
  allocs estimate:  1887
  --------------
  minimum time:     69.606 μs (0.00% GC)
  median time:      71.283 μs (0.00% GC)
  mean time:        76.601 μs (5.36% GC)
  maximum time:     2.891 ms (95.37% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [25]:
@benchmark J(styblinskiTang,1,[rand(100,1)...]) 

BenchmarkTools.Trial: 
  memory estimate:  143.59 KiB
  allocs estimate:  3738
  --------------
  minimum time:     138.060 μs (0.00% GC)
  median time:      141.117 μs (0.00% GC)
  mean time:        152.691 μs (5.56% GC)
  maximum time:     3.061 ms (94.32% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [26]:
@benchmark J(styblinskiTang,1,[rand(500,1)...]) 

BenchmarkTools.Trial: 
  memory estimate:  696.16 KiB
  allocs estimate:  18540
  --------------
  minimum time:     691.687 μs (0.00% GC)
  median time:      708.943 μs (0.00% GC)
  mean time:        757.079 μs (5.59% GC)
  maximum time:     3.949 ms (81.65% GC)
  --------------
  samples:          6583
  evals/sample:     1

In [27]:
@benchmark J(styblinskiTang,1,[rand(1000,1)...]) 

BenchmarkTools.Trial: 
  memory estimate:  1.37 MiB
  allocs estimate:  37531
  --------------
  minimum time:     1.395 ms (0.00% GC)
  median time:      1.445 ms (0.00% GC)
  mean time:        1.546 ms (5.72% GC)
  maximum time:     4.899 ms (63.44% GC)
  --------------
  samples:          3228
  evals/sample:     1

In [28]:
@benchmark J(styblinskiTang,1,[rand(5000,1)...]) 

BenchmarkTools.Trial: 
  memory estimate:  6.81 MiB
  allocs estimate:  189538
  --------------
  minimum time:     7.161 ms (0.00% GC)
  median time:      7.641 ms (0.00% GC)
  mean time:        8.278 ms (7.27% GC)
  maximum time:     13.249 ms (41.73% GC)
  --------------
  samples:          604
  evals/sample:     1

In [29]:
@benchmark J(styblinskiTang,1,[rand(10000,1)...]) 

BenchmarkTools.Trial: 
  memory estimate:  13.62 MiB
  allocs estimate:  379539
  --------------
  minimum time:     14.961 ms (0.00% GC)
  median time:      15.786 ms (0.00% GC)
  mean time:        17.139 ms (9.19% GC)
  maximum time:     22.801 ms (29.64% GC)
  --------------
  samples:          292
  evals/sample:     1

## Wykres 

In [29]:
x = [5,50,100,500,1000,5000,10000]
y_fw_rosenbrok = [3.316/1000, 129.016/1000,  493.868/1000,  11.237, 44.794, 1.115*1000,  4.808*1000]
y_fw_styblinskiTang = [3.472/1000,90.906/1000,358.546/1000,7.804,29.963,760.267,3.124*1000]
y_bw_rosenbrok = [7.198/1000,80.930/1000,161.235/1000,821.326/1000,1.668,8.805,18.512]
y_bw_styblinskiTang = [9.339/1000,76.601/1000,152.691/1000,757.079/1000,1.546,8.278,17.139]

plot(x, y_fw_rosenbrok,label = "Funkcja Rosenbrocka", lw = 3, legend=:topleft)
plot!(x, y_fw_styblinskiTang, label = "Funkcja Styblinski-Tang", lw = 3)
xlabel!("Liczba zmiennych")
ylabel!("Czas obliczeń [ms]")

In [30]:
plot(x, y_bw_rosenbrok,label = "Funkcja Rosenbrocka", lw = 3, legend=:topleft)
plot!(x, y_bw_styblinskiTang, label = "Funkcja Styblinski-Tang", lw = 3)
xlabel!("Liczba zmiennych")
ylabel!("Czas obliczeń [ms]")

In [51]:
plot(x, log.(y_fw_styblinskiTang),label = "Różniczkowanie w przód", lw = 3, legend=:topleft)
plot!(x, log.(y_bw_styblinskiTang), label = "Różniczkowanie w tył", lw = 3)
plot!(title="Średni czas obliczeń funkcji Styblinski-Tang")

## Testy funkcji wektorowych

In [23]:
J(TestFunction5,5, [5.3,3.3])

5-element Vector{Matrix{Float64}}:
 [3.13652106757129e13; 2.1369922567008554e11]
 [4.712093101741395e169; 1.140214592274678]
 [2.0865546881500618e25; 0.027451499719419292]
 [42.4; 4.086653233042158]
 [43.4; -1.5408631772268135]

In [17]:
@benchmark J(TestFunction1,1, [rand(2,1)...])

BenchmarkTools.Trial: 
  memory estimate:  2.97 KiB
  allocs estimate:  69
  --------------
  minimum time:     3.075 μs (0.00% GC)
  median time:      3.253 μs (0.00% GC)
  mean time:        3.523 μs (5.40% GC)
  maximum time:     394.993 μs (99.09% GC)
  --------------
  samples:          10000
  evals/sample:     8

In [19]:
@benchmark J(TestFunction2,2, [rand(2,1)...])

BenchmarkTools.Trial: 
  memory estimate:  10.36 KiB
  allocs estimate:  229
  --------------
  minimum time:     7.583 μs (0.00% GC)
  median time:      7.792 μs (0.00% GC)
  mean time:        8.662 μs (7.34% GC)
  maximum time:     754.966 μs (98.50% GC)
  --------------
  samples:          10000
  evals/sample:     4

In [18]:
@benchmark J(viennet,3, [rand(2,1)...])

BenchmarkTools.Trial: 
  memory estimate:  16.83 KiB
  allocs estimate:  365
  --------------
  minimum time:     9.258 μs (0.00% GC)
  median time:      9.720 μs (0.00% GC)
  mean time:        11.033 μs (7.46% GC)
  maximum time:     2.863 ms (99.45% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [20]:
@benchmark J(TestFunction4,4, [rand(2,1)...])

BenchmarkTools.Trial: 
  memory estimate:  22.30 KiB
  allocs estimate:  499
  --------------
  minimum time:     15.259 μs (0.00% GC)
  median time:      15.968 μs (0.00% GC)
  mean time:        17.940 μs (7.66% GC)
  maximum time:     2.953 ms (99.06% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [21]:
@benchmark J(TestFunction5,5, [rand(2,1)...])

BenchmarkTools.Trial: 
  memory estimate:  38.38 KiB
  allocs estimate:  785
  --------------
  minimum time:     20.006 μs (0.00% GC)
  median time:      20.884 μs (0.00% GC)
  mean time:        23.805 μs (9.35% GC)
  maximum time:     3.053 ms (98.94% GC)
  --------------
  samples:          10000
  evals/sample:     1

## Wykresy

In [3]:
x = [1,2,3,4,5]
y_fw = [2.528, 3.825,  2.639,  8.544, 5.611]
y_bw = [3.523,8.662,11.033,17.940,23.805]
plot(x, y_fw,label = "Różniczkowanie w przód", lw = 3, legend=:topleft)
plot!(x, y_bw, label = "Różniczkowanie w tył", lw = 3)
xlabel!("Liczba funkcji składowych")
ylabel!("Czas obliczeń [μs]")