# Structure Definition

In [33]:
abstract type Node end
abstract type LeafNode <: Node end
mutable struct Variable{T} <: LeafNode
    value::T
    grad::T
#! inicjalizacja wartości gradientu nie wartościami losowymi!!!!
    Variable(val::T) where T = new{T}(val, 0.001 .* val)
    Variable(val::T, grad::T) where T = new{T}(val, grad)
end


struct ComputableNode{TOperation, TAttributes} <: Node 
    operation::TOperation
    attribute::TAttributes
end

mutable struct CachedNode{TNode, TOutput} <: Node
    node::TNode
    output::TOutput
end

Main.RDStructure

# Creating Nodes functions

In [2]:
function register(op, args...)
 node = ComputableNode(op, args)
 out = forward(node)
 CachedNode(node, out)
end

import Base: +, -, *, /, sin,cos,tan,exp
import Base: zero, isless
import Base: convert, promote_rule
+(x::Node, y::Node) = register(+, x, y)
-(x::Node, y::Node) = register(-, x, y)
*(x::Node, y::Node) = register(*, x, y)
/(x::Node, y::Node) = register(/, x, y)
sin(x::Node) = register(sin,x)
cos(x::Node) = register(cos,x)
tan(x::Node) = register(tan,x)
exp(x::Node) = register(exp,x)

forward(cached::CachedNode) = cached.output = forward(cached.node)
forward(node::ComputableNode) = forward(node.operation, map(forward, node.attribute)...)
forward(op::Function, args...) = op(args...)
forward(var::Variable) = var.value
value(x::Variable) = x.value



zero(x::Variable) = Variable(0.0, 0.0);
isless(x::Variable{any}, y::Variable{any}) = x.value < y.value;
isless(x::Float64, y::Variable{Float64}) = x < y.value;
isless(x::Int64, y::Variable{Float64}) = x < y.value;

convert(::Type{Variable{T}}, x::Variable) where T =
 Variable(convert(T, x.value), convert(T, x.grad))
convert(::Type{Variable{T}}, x::Number) where T =
 Variable(convert(T, x), zero(T))
promote_rule(::Type{Variable{T}}, ::Type{R}) where {T,R} =
 Variable{promote_type(T,R)}

function fval(f, xv, yv)
 x, y = Variable(xv), Variable(yv)
 z = f(x, y)
 value(z)
end
function fgrad(f, xv, yv)
 x, y = Variable(xv), Variable(yv)
 z = f(x, y)
 backward(z, Variable(1.0))
 5e-4x.grad, 5e-4y.grad
end


function fval(f, xv)
 x = Variable(xv)
 z = f(x)
 value(z)
end
function fgrad(f, xv)
 x = Variable(xv)
 z = f(x)
 backward(z, 1)
 x.grad
end

fgrad (generic function with 2 methods)

# Backward AD

In [3]:
function backward(cached::CachedNode, grad::Any)
  grad_inputs = gradient(cached, grad)
  for (each, each_grad) in zip(cached.node.attribute, grad_inputs)
    backward(each, each_grad)
  end
end
# function backward(cached::CachedNode)
# end
+(x::Variable{Float64}, y::Float64) = +(value(x), y)
*(x::Variable{Float64}, y::Float64) = *(value(x), y)
# -(x::CachedNode{ComputableNode{typeof(+),Tuple{Variable{Float64},Variable{Float64}}},Float64}, y::Float64) =
#     -(value(x),y)
# *(x::CachedNode{ComputableNode{typeof(+),Tuple{Variable{Float64},Variable{Float64}}},Float64}, y::Float64) =
#     *(value(x), y)

gradient(cached::CachedNode, grad) =
 gradient(cached.node.operation, grad, map(value, cached.node.attribute)...)
gradient(op::Function, grad, args...) =
 gradient(op, grad, args...)
value(cached::CachedNode) = value(cached.output)
value(var::Variable) = var.value
value(var::Float64) = var
gradient(::typeof(+), grad, x, y) = (grad * (1 + y), grad * (x+1))
gradient(::typeof(-), grad, x, y) = (grad * (1- y), grad * (x-1))
gradient(::typeof(/), grad, x, y) = (grad / y, -1.0 * grad * x / y^2)
gradient(::typeof(*), grad, x, y) = (grad * y, grad * x)
gradient(::typeof(sin), grad, x) = (grad * cos(x), )
gradient(::typeof(cos), grad, x) = (-grad * cos(x), )
gradient(::typeof(tan), grad, x) = (grad/(cos(x)*cos(x)), )
gradient(::typeof(exp), grad, x) = (grad * exp(x), )


function backward(var::Variable, grad)
 if isdefined(var, :grad)
        var.grad += grad
    else
        var.grad = grad
    end
    nothing
end

backward (generic function with 2 methods)

# Test set

In [8]:
v = -1:.2:+1
n = length(v)
xv = repeat(v, inner=n)
yv = repeat(v, outer=n)

x = -1.0:0.05:+1.0
range =  0:π/360:2*π;
rangeTan = [i for i in -π/2+π/180:π/180:π/2- π/180] ;

m = [i for i in 1:0.5:10];

# Rosenbrock

In [None]:
rosenbrock(x, y) = (Variable(1.0) - x*x) + Variable(100.0)*(y - x*x)*(y - x*x)

In [None]:
zv = fval.(rosenbrock, xv, yv)
dz = fgrad.(rosenbrock, xv[:], yv[:])

# Plotting

In [None]:
using Plots
zv = reshape(zv, n, n)
contour(v, v, zv, fill=true)
quiver!(xv[:], yv[:], gradient=dz)

# ReLu

In [None]:
ReLu(x::Variable) = x > 0 ? x : zero(x)

In [None]:
y = fval.(ReLu, x);
d = fgrad.(ReLu, x);

In [None]:
using Plots
plot(x, d, label = "partials")
plot!(x, y, label = "values")

# Sinus

In [None]:
y = fval.(sin, range);
d = fgrad.(sin, range);

using Plots
plot(range, d, label = "partials")
plot!(range, y, label = "values")

# Cosiuns

In [None]:
y = fval.(cos, range);
d = fgrad.(cos, range)

using Plots
plot(range, d, label = "partials")
plot!(range, y, label = "values")

# Tanges

In [None]:
y = fval.(tan, rangeTan);
d = fgrad.(tan, rangeTan);

In [35]:
using Plots
plot(rangeTan, d, label = "partials")
plot!(rangeTan, y, label = "values")

BoundsError: BoundsError: attempt to access 179-element Array{Float64,1} at index [1:201]

# Softmax

In [34]:
softmax(arg) = exp.(arg) ./ sum(exp.(arg));

A = collect(-10.0:0.1:10.0);
y = softmax(A);

d = Vector{Float64}
d = zeros(0)

for i =1:length(A)
    push!(d, f_value[i] * (1.0 - f_value[i])) 
end

using Plots
plot(A, d, label = "partials")
plot!(A, y, label = "values")

UndefVarError: UndefVarError: plot not defined

# Jacobian

In [None]:
function jacobian(f::Function, args::Vector{T}) where {T <:Number}
    jacobian_columns = Matrix{T}[]
    for i=1:length(args)
        x = T[]
        for j=1:length(args)
            if i == j
                push!(x, fgrad(f, args[j]))
            else
                push!(x, 0.0::T)
            end
        end
        push!(jacobian_columns, x[:,:])
    end
    hcat(jacobian_columns...)
end

function jacobian(f::Function, xargs::Vector{T}, yargs::Vector{T}) where {T <:Number}
    xjacobian_columns = Matrix{T}[]
    yjacobian_columns = Matrix{T}[]
    @assert length(xargs) == length(yargs)
    for i=1:length(xargs)
        x = T[]
        y = T[]
        for j=1:length(xargs)
            if i == j
                xval, yval = fgrad(f, xargs[j], yargs[j])
                push!(x, xval)
                push!(y, yval)
            else
                push!(x, 0.0::T)
                push!(y, 0.0::T)
            end
        end
        push!(xjacobian_columns, x[:,:])
        push!(yjacobian_columns, y[:,:])
    end
    hcat(xjacobian_columns...)
    hcat(yjacobian_columns...)
    xjacobian_columns, yjacobian_columns
end

In [None]:
    x = [i for i in -1.0:0.5:1];
    range = [i for i in 0:π/360:2*π] ;
    rangeTan = [i for i in -π/2+π/180:π/180:π/2- π/180];

    v = -1:0.2:+1
    n = length(v)
    xv = repeat(v, inner=n)
    yv = repeat(v, outer=n)


    display("Jacobi Rosenbrock")
    dx, dy = jacobian(rosenbrock, xv, yv)
    display(dx)
    # @show y

    display("Jacobi Relu")
    y = jacobian(ReLu,x);
    display(y)

    display("Jacobi Relu")
    y = jacobian(ReLu,x);
    display(y)

    display("Jacobi Sin")
    y = jacobian(sin, range);
    display(y)
    # display(@benchmark $jacobian(x -> $sin.(x), $range))

    # Jacobi - Cos
    display("Jacobi Cos")
    y = jacobian(cos, range);
    display(y)
    # display(@benchmark $jacobian(x -> $cos.(x), $range))

    # Jacobi - Tan
    display("Jacobi Tan")
    y = jacobian(tan, rangeTan);
    display(y)

In [10]:
softmax(arg) = exp.(arg) ./ sum(exp.(arg))

   function dSoftmaxdx(x::Vector{T}) where {T <: Number}
        derivative_matrix = Matrix{T}[]
        f_value = softmax(x)
        for i =1:length(x)
            col = T[]
            for j =1:length(x)
                if i == j
                    push!(col,f_value[i] * (1.0 - f_value[i]))
                else
                    push!(col, -1.0 * f_value[i] * f_value[j])
                end
            end
            # @show x
            push!(derivative_matrix, col[:,:])
        end
        hcat(derivative_matrix...)
    end

d = dSoftmaxdx(m)

19×19 Array{Float64,2}:
  4.85593e-5  -3.88806e-9  -6.41033e-9   …  -1.15902e-5   -1.91089e-5
 -3.88806e-9   8.00581e-5  -1.05689e-8      -1.91089e-5   -3.15053e-5
 -6.41033e-9  -1.05689e-8   0.000131987     -3.15053e-5   -5.19435e-5
 -1.05689e-8  -1.74251e-8  -2.87291e-8      -5.19435e-5   -8.56403e-5
 -1.74251e-8  -2.87291e-8  -4.73663e-8      -8.56403e-5   -0.000141197
 -2.87291e-8  -4.73663e-8  -7.80938e-8   …  -0.000141197  -0.000232794
 -4.73663e-8  -7.80938e-8  -1.28755e-7      -0.000232794  -0.000383813
 -7.80938e-8  -1.28755e-7  -2.12281e-7      -0.000383813  -0.000632801
 -1.28755e-7  -2.12281e-7  -3.49992e-7      -0.000632801  -0.00104331
 -2.12281e-7  -3.49992e-7  -5.7704e-7       -0.00104331   -0.00172013
 -3.49992e-7  -5.7704e-7   -9.51378e-7   …  -0.00172013   -0.00283602
 -5.7704e-7   -9.51378e-7  -1.56856e-6      -0.00283602   -0.0046758
 -9.51378e-7  -1.56856e-6  -2.58611e-6      -0.0046758    -0.00770909
 -1.56856e-6  -2.58611e-6  -4.26378e-6      -0.00770909   -0.01