In [1]:
# Zdefiniowanie struktury
struct Dual{T <:Number} <:Number
    v::T
   dv::T
end
# Przeciążenie podstawowych operatorów
import Base: +, -, *, /
-(x::Dual)          = Dual(-x.v,       -x.dv)
+(x::Dual, y::Dual) = Dual( x.v + y.v,  x.dv + y.dv)
-(x::Dual, y::Dual) = Dual( x.v - y.v,  x.dv - y.dv)
*(x::Dual, y::Dual) = Dual( x.v * y.v,  x.dv * y.v + x.v * y.dv)
/(x::Dual, y::Dual) = Dual( x.v / y.v, (x.dv * y.v - x.v * y.dv)/y.v^2)
# Przeciążenie podstawowych funkcji
import Base: abs, sin, cos, tan, exp, sqrt, isless
abs(x::Dual)  = Dual(abs(x.v),sign(x.v)*x.dv)
sin(x::Dual)  = Dual(sin(x.v), cos(x.v)*x.dv)
cos(x::Dual)  = Dual(cos(x.v),-sin(x.v)*x.dv)
tan(x::Dual)  = Dual(tan(x.v), one(x.v)*x.dv + tan(x.v)^2*x.dv)
exp(x::Dual)  = Dual(exp(x.v), exp(x.v)*x.dv)
sqrt(x::Dual) = Dual(sqrt(x.v),.5/sqrt(x.v) * x.dv)
isless(x::Dual, y::Dual) = x.v < y.v;
# Promocja typów i konwersja
import Base: convert, promote_rule
convert(::Type{Dual{T}}, x::Dual) where T = Dual(convert(T, x.v), convert(T, x.dv))
@show Dual{Float64}[Dual(1,2), Dual(3,0)];
convert(::Type{Dual{T}}, x::Number) where T = Dual(convert(T, x), zero(T))
@show Dual{Float64}[1, 2, 3];
promote_rule(::Type{Dual{T}}, ::Type{R}) where {T,R} = Dual{promote_type(T,R)}
@show Dual(1,2) * 3;
# Pomocne funkcje
import Base: show
show(io::IO, x::Dual) = print(io, "(", x.v, ") + [", x.dv, "ϵ]");
value(x::Dual) = x.v;
partials(x::Dual) = x.dv;
ReLU(x) = max(zero(x), x)
σ(x) = one(x) / (one(x) + exp(-x))
tanh(x) = 2.0 / (one(x) + exp(-2.0x)) - one(x)
ϵ = Dual(0., 1.)
D = derivative(f, x) = partials(f(Dual(x, one(x))))
J = function jacobian(f, args::Vector{T}) where {T <:Number}
    jacobian_columns = Matrix{T}[]
    
    for i=1:length(args)
        x = Dual{T}[]
        for j=1:length(args)
            if i == j
                push!(x, Dual(args[j], one(args[j])))
            else
                push!(x, Dual(args[j], zero(args[j])))
            end
        end
        column = partials.([f(x)...])
        push!(jacobian_columns, column[:,:])
    end
    hcat(jacobian_columns...)
end

H = function hessian(f, args::Vector)
    ∇f(x::Vector) = J(f, x)
    J(∇f, args)
end


Dual{Float64}[Dual(1, 2), Dual(3, 0)] = Dual{Float64}[Dual{Float64}(1.0, 2.0), Dual{Float64}(3.0, 0.0)]
Dual{Float64}[1, 2, 3] = Dual{Float64}[Dual{Float64}(1.0, 0.0), Dual{Float64}(2.0, 0.0), Dual{Float64}(3.0, 0.0)]
Dual(1, 2) * 3 = Dual{Int64}(3, 6)


hessian (generic function with 1 method)

In [5]:
mean_squared_loss(y::Vector, ŷ::Vector) = sum(0.5(y - ŷ).^2)
fullyconnected(w::Vector, n::Number, m::Number, v::Vector, activation::Function) = activation.(reshape(w, n, m) * v)
n1 = 6
n2 = 6
Wh  = randn(n1,2)
Wh2 = randn(n2,n1)
Wo  = randn(1,n2)
dWh = similar(Wh)
dWh2 = similar(Wh2)
dWo = similar(Wo)
x = [1.98;4.434]
y = [0.064]
E = Float64[]

function net(x, wh, wh2, wo, y)
    println(wh[1])
    x̂ = fullyconnected(wh, n1, 2, x, σ)
    x2 = fullyconnected(wh2, n2, n1, x̂, σ)
    ŷ = fullyconnected(wo, 1, n2, x2, u -> u)
    println(y, ŷ)
    E = mean_squared_loss(y, ŷ)
end
Ei = net(x, Wh[:], Wh2[:], Wo[:], y)

0.8804369269715518
[0.064][1.1826662488850403]


0.6257070881972635

In [8]:
0.064 - Dual(1.1826662488850403, -0.006851312753841444)

(-1.1186662488850403) + [0.006851312753841444ϵ]

In [6]:
dnet_Wh(x, wh, wh2, wo, y) = J(w -> net(x, w, wh2, wo, y), wh);
dWh[:] = dnet_Wh(x, Wh[:], Wh2[:], Wo[:], y);

dnet_Wh2(x, wh, wh2, wo, y) = J(w -> net(x, wh, w, wo, y), wh2);
dWh2[:] = dnet_Wh2(x, Wh[:], Wh2[:], Wo[:], y);

dnet_Wo(x, wh, wh2, wo, y) = J(w -> net(x, wh, wh2, w, y), wo);
dWo[:] = dnet_Wo(x, Wh[:], Wh2[:], Wo[:], y);
for i=1:2
    push!(E, Ei)
    println(Ei)
    Wh .-= 0.04dWh
    Wh2 .-= 0.04dWh2
    Wo .-= 0.04dWo
    Ei  = net(x, Wh[:], Wh2[:], Wo[:], y)
end

(0.8804369269715518) + [1.0ϵ]
[0.064]Dual{Float64}[(1.1826662488850403) + [-0.006851312753841444ϵ]]
(0.8804369269715518) + [0.0ϵ]
[0.064]Dual{Float64}[(1.1826662488850403) + [4.10838852587308e-5ϵ]]
(0.8804369269715518) + [0.0ϵ]
[0.064]Dual{Float64}[(1.1826662488850403) + [0.0063187308009521725ϵ]]
(0.8804369269715518) + [0.0ϵ]
[0.064]Dual{Float64}[(1.1826662488850403) + [-0.042421940886825604ϵ]]
(0.8804369269715518) + [0.0ϵ]
[0.064]Dual{Float64}[(1.1826662488850403) + [0.0013880114189563978ϵ]]
(0.8804369269715518) + [0.0ϵ]
[0.064]Dual{Float64}[(1.1826662488850403) + [0.002706211227409685ϵ]]
(0.8804369269715518) + [0.0ϵ]
[0.064]Dual{Float64}[(1.1826662488850403) + [-0.015342788257844932ϵ]]
(0.8804369269715518) + [0.0ϵ]
[0.064]Dual{Float64}[(1.1826662488850403) + [9.200300365515778e-5ϵ]]
(0.8804369269715518) + [0.0ϵ]
[0.064]Dual{Float64}[(1.1826662488850403) + [0.014150127460314106ϵ]]
(0.8804369269715518) + [0.0ϵ]
[0.064]Dual{Float64}[(1.1826662488850403) + [-0.09499943731928523ϵ]]
(0.880

In [61]:
E

10-element Vector{Float64}:
 2.166096941040962
 0.4395299389813892
 0.006706030996402699
 0.17921554984867658
 0.8210407909483454
 1.8833566986082655
 3.314463769576946
 5.06155014315852
 7.078233414956745
 9.328269218741724