In [1]:
import Base: iterate, exp, log, sin, cos, tan, +, ^, -, *, /, sqrt, convert, promote_rule, zero,max, isless


In [2]:
# This is the structure that stores the autodiff information.  The name is
# `Infinitesimal` spelled backward.  The elements are `x`, the result of a
# computation; `dfdy` a storage location where the derivative of some overall
# computation (`f(x)`) with respect to the output of this element is stored;
# `parent` storing the "parent" locations that have been used to produce the
# result `x`; and `bp!` a function that propogates the derivative `dfdy` to the
# parents via `dfdx = dydx * dfdy` where `dydx` is the derivative of this piece
# of the computation, `dfdy` is the derivative of the overall function with
# respect to the output, and `dfdx` is the same for the next level up.
mutable struct Dual{T <: Number} <: Number
    x::T
    dfdy::T
    parent::Union{Dual{T}, Array{Dual{T},1}, Int, Nothing}
    bp!
end
# Dual(n::Integer, d::Float64, parent::Union{Dual{T}, Array{Dual{T},1}, Int, Nothing}, bp!::Any)  where {T <: Number} = Dual(promote(n, d)..., parent, bp!)
# Dual(n::Float64, d::Integer, parent::Union{Dual{T}, Array{Dual{T},1}, Int, Nothing}, bp!::Any)  where {T <: Number} = Dual(promote(n, d)..., parent, bp!)


In [3]:
# x = Dual(2.0, 2, 1, (dfdy, parents) -> nothing)*  Dual(2, 2, 1, (dfdy, parents) -> nothing)
promote_type(Dual{Real}, Dual{Number})

Dual

In [4]:
function convert(::Type{Dual{T}}, x::T) where T <: Number
    Dual(x, zero(T), nothing, (dfdy, parents) -> nothing)
end

# function convert(::Type{Dual{T}}, x::Dual{S}) where {T, S <: Number}
#     Dual(T(x.x), T(x.dfdy), x.parent, x.bp!)
# end


function convert(::Type{Dual{T}}, x::S) where {T, S <: Number}
    print("2asdsadsda", typeof(x))
    Dual(T(x), zero(T), nothing, (dfdy, parents) -> nothing)
end

function convert(::Type{Dual{T}}, x::Dual{T}) where T
    x
end


convert (generic function with 186 methods)

In [5]:
function zero(x::Dual{T}) where T
    Dual(zero(T), zero(T), nothing, (dfdy, parents) -> nothing)
end

zero (generic function with 23 methods)

In [6]:
function promote_rule(::Type{Dual{T}}, ::Type{Dual{S}}) where {T,S}
    Dual{promote_type(T,S)}
end
function promote_rule(::Type{Dual{T}}, ::Type{S}) where {T, S <: Number}
    Dual{promote_type(T,S)}
end

function promote_rule(::Type{T}, ::Type{Dual{S}}) where {T <: Number, S}
    Dual{promote_type(T,S)}
end

function promote_rule(::Type{S}, ::Type{Dual{T}}) where {S <: AbstractIrrational, T}
    Dual{promote_type(S, T)}
end


promote_rule (generic function with 126 methods)

In [7]:
function push_parents!(queue::Array{Dual{T}, 1}, ::Nothing) where T
    # Do nothing
end
function push_parents!(queue::Array{Dual{T}, 1}, i::Int) where T
    # Do nothing
end
function push_parents!(queue::Array{Dual{T}, 1}, ls::Array{Dual{T}, 1}) where T
    append!(queue, ls)
end
function push_parents!(queue::Array{Dual{T}, 1}, l::Dual{T}) where T
    push!(queue, l)
end

push_parents! (generic function with 4 methods)

In [8]:
function backprop!(l::Dual{T}) where T
    # Apparently we need this construction because otherwise l gets copied when
    # put into the array.
    backprop!([l])
end
function backprop!(queue::Array{Dual{T},1}) where T
    while length(queue) > 0
        l = popfirst!(queue)
        l.bp!(l.dfdy, l.parent)
        push_parents!(queue, l.parent)
    end
end

backprop! (generic function with 2 methods)

In [9]:
function collect_outputs(l::Dual{T}) where T
    queue = Dual{T}[l]

    outputs = Dual{T}[]

    while length(queue) > 0
        l = popfirst!(queue)
        if typeof(l.parent) <: Int
            push!(outputs, l)
        elseif typeof(l.parent) == Dual{T}
            push!(queue, l.parent)
        elseif typeof(l.parent) == Array{Dual{T}, 1}
            append!(queue, l.parent)
        else # Nothing
            # Do nothing
        end
    end

    outputs
end


collect_outputs (generic function with 1 method)

In [10]:
"""    D([i], f)

Returns a function that computes the derivative of `f` (if it is
single-argument) or the gradient of `f` (if it is multi-argument or takes a
structured argument).  If `i` is given returns the `i`th component of the
gradient (though this does not reduce the cost with backprop).

Currently works only for `f` with scalar outputs.  Also, note that the autodiff
will fail unless the output type is identical to the input type (the code
automatically converts constants and non-differentiated expressions to the
appropriate type---only the input and output type of the function needs to
match).

So, for example, `D(cos)(3)` will fail (because the output type is `Float64` not
`Int`), while `D(cos)(3.0)` will work fine and return `-sin(3.0)`.

"""
function D(f)
    function dfdx(x::T) where T <: Number
        # Pass the function a backward infinitesimal whose backprop function
        # stores the backprop derivative in dfdx_store
        print("x::T")

        x = Dual(x, zero(x), 1, (dfdy, parents) -> nothing)

        result = f(x)

        result.dfdy = one(result.x)
        backprop!(result)

        y = collect_outputs(result)[1]

        return y.dfdy
    end

    function dfdx(x::Array{T}, i) where T <: Number
        print("array")
        fargs = [Dual(xelt, zero(xelt), i, (dfdy, parents) -> nothing) for (i, xelt) in enumerate(x)]
        result = f(fargs)[i]
        result.dfdy = one(result.x)
        backprop!(result)
        y = collect_outputs(result)
        grad = zeros(typeof(result.x), length(x))
        for yelt in y
            grad[yelt.parent] = yelt.dfdy
        end

        return grad
    end

    function dfdx(x...)
        print("x...")

        fargs = [Dual(xelt, zero(xelt), i, (dfdy, parents) -> nothing) for (i, xelt) in enumerate(x)]
        result = f(fargs...)
        result.dfdy = one(result.x)
        backprop!(result)
        y = collect_outputs(result)

        grad = zeros(typeof(result.x), length(x))
        for yelt in y
            grad[yelt.parent] = yelt.dfdy
        end

        return grad
    end

    return dfdx
end

D

In [11]:
function D(i::Integer, f)
    df = D(f)
    function df_wrapper(x...)
        g = df(x...)
        return g[i]
    end
    return df_wrapper
end

D (generic function with 2 methods)

In [12]:
function bpp!(dfdy, xy)
    x, y = xy
    x.dfdy += dfdy
    y.dfdy += dfdy
end

function +(x::Dual{T}, y::Dual{T}) where T
    Dual(x.x + y.x, zero(T), [x, y], bpp!)
end

function bpm!(dfdy, xy)
    x, y = xy
    x.dfdy += dfdy
    y.dfdy -= dfdy
end
function -(x::Dual{T}, y::Dual{T}) where T
    Dual(x.x - y.x, zero(T), [x, y], bpm!)
end

function bpum!(dfdy, x)
    x.dfdy -= dfdy
end
function -(x::Dual{T}) where T
    Dual(-x.x, zero(T), x, bpum!)
end

function bpt!(dfdy, xy)
    x,y = xy
    x.dfdy += dfdy*y.x
    y.dfdy += x.x*dfdy
end
function *(x::Dual{T}, y::Dual{T}) where T
    Dual(x.x*y.x, zero(T), [x,y], bpt!)
end

function /(x::Dual{T}, y::Dual{T}) where T
    yinv = one(T)/y.x

    function bp!(dfdy, xy)
        a,b = xy
        a.dfdy += dfdy*yinv
        b.dfdy -= a.x*dfdy*(yinv*yinv)
    end

    Dual(x.x*yinv, zero(T), [x,y], bp!)
end

function exp(x::Dual{T}) where T
    expx = exp(x.x)

    function bp!(dfdy, p)
        p.dfdy += dfdy*expx
    end

    Dual(expx, zero(expx), x, bp!)
end

function exp(xs::Array)
    print(xs)
    return [exp(x) for x in xs]
end

function sin(x::Dual{T}) where T
    function bp!(dfdy, p)
        p.dfdy += cos(x.x)*dfdy
    end

    sx = sin(x.x)
    Dual(sx, zero(sx), x, bp!)
end

function cos(x::Dual{T}) where T
    function bp!(dfdy, p)
        p.dfdy -= sin(x.x)*dfdy
    end

    cx = cos(x.x)
    Dual(cx, zero(cx), x, bp!)
end

function tan(x::Dual{T}) where T
    c = cos(x.x)
    function bp!(dfdy, p)
        p.dfdy += dfdy/(c*c)
    end

    tx = tan(x.x)
    Dual(tx, zero(tx), x, bp!)
end

function sqrt(x::Dual{T}) where T
    sqrtx = sqrt(x.x)

    function bp!(dfdy, p)
        p.dfdy += dfdy/(2*sqrtx)
    end

    Dual(sqrtx, zero(sqrtx), x, bp!)
end

function ^(a::Dual{T}, x::Dual{T}) where T
    value = a.x^x.x
    function bp!(dfdy, xy)
        a,x = xy
#          print(typeof(dfdy * (x.x) * (a.x)^(x.x - 1)), " nic  ", typeof(dfdy * a.x^x.x*log(a.x)))
        a.dfdy += dfdy * (x.x) * (a.x)^(x.x - 1)
        x.dfdy += dfdy * a.x^x.x*log(a.x)
    end

    Dual(value, zero(value), [a, x], bp!)
end

function log(x::Dual{T}) where T
    function bp!(dfdy, p)
        p.dfdy += dfdy/x.x
    end
    Dual(log(x.x), zero(T), x, bp!)
end

function max(a, x::Dual{T}) where T
    function bp!(dfdy,p)
        p.dfdy += x.x < a ? a : 1 * dfdy
    end
    Dual(max(0, x.x), zero(T), x, bp!)
    

end


isless(x::Dual, y::Dual) = x.x < y.x;

In [40]:
function iterate(iter::Dual, state=1)
    if state > length(iter.x)
        return nothing
    end
    return (iter[state],state+1)
end

function softmax(vector::Array)
    e = exp(vector)    
    return e / sum(e)
end

softmax (generic function with 1 method)

In [14]:
J = function jacobian(f, number_of_functions, args::Vector{T}) where {T <:Number}
    jacobian_rows = Matrix{T}[]
    
    for i=1:number_of_functions
#         x = Dual{T}[]
#         for j=1:length(args)
#             seed = (i == j)
#             push!(x, seed ?
#                 Dual(args[j], one(args[j])) :
#                 Dual(args[j],zero(args[j])) )
#         end
#         temp  = [f(x)..]
        d = D(f)
        rows = d(args,i)
        push!(jacobian_rows, rows[:,:])
    end
    jacobian_rows
end

jacobian (generic function with 1 method)

In [15]:
function test(a, x)
    return a*x
end

test (generic function with 1 method)

In [16]:
d = D(test)

(::var"#dfdx#12"{typeof(test)}) (generic function with 3 methods)

In [17]:
d(2, 3)

x...

2-element Array{Int64,1}:
 3
 2

In [22]:
# f(x::Vector) = [2x[1]*x[2], 3x[2]*x[3]^2]
f(x::Vector) = [x[1], 5*x[3], 4x[2]^2-2x[3], x[3]*sin(x[1])]


f (generic function with 1 method)

In [25]:
J(f,4,[1.,2., 3.]) 


array2asdsadsdaInt642asdsadsdaInt642asdsadsdaInt64array2asdsadsdaInt642asdsadsdaInt642asdsadsdaInt64array2asdsadsdaInt642asdsadsdaInt642asdsadsdaInt64array2asdsadsdaInt642asdsadsdaInt642asdsadsdaInt64

4-element Array{Array{Float64,2},1}:
 [1.0; 0.0; 0.0]
 [0.0; 0.0; 5.0]
 [0.0; 16.0; -2.0]
 [1.6209069176044193; 0.0; 0.8414709848078965]

In [44]:
J(softmax,2,[1., 2.]) 

arrayDual{Float64}[Dual{Float64}(1.0, 0.0, 1, var"#9#15"()), Dual{Float64}(2.0, 0.0, 2, var"#9#15"())]arrayDual{Float64}[Dual{Float64}(1.0, 0.0, 1, var"#9#15"()), Dual{Float64}(2.0, 0.0, 2, var"#9#15"())]

2-element Array{Array{Float64,2},1}:
 [0.46555335461147695; -0.19661193324148185]
 [-0.19661193324148188; 0.9276705118714866]

In [48]:
x = [Dual(1.0, 0.0, nothing, nothing), Dual(2.0, 0.0, nothing, nothing), Dual(3.0, 0.0, nothing, nothing)]
y = [Dual(2.0, 0.0, nothing, nothing), Dual(2.0, 0.0, nothing, nothing), Dual(2.0, 0.0, nothing, nothing)]


3-element Array{Dual{Float64},1}:
 Dual{Float64}(2.0, 0.0, nothing, nothing)
 Dual{Float64}(2.0, 0.0, nothing, nothing)
 Dual{Float64}(2.0, 0.0, nothing, nothing)

In [59]:
exp(x)/sum(x)


Dual{Float64}[Dual{Float64}(1.0, 0.0, nothing, nothing), Dual{Float64}(2.0, 0.0, nothing, nothing), Dual{Float64}(3.0, 0.0, nothing, nothing)]

3-element Array{Dual{Float64},1}:
  Dual{Float64}(0.45304697140984085, 0.0, Dual{Float64}[Dual{Float64}(2.718281828459045, 0.0, Dual{Float64}(1.0, 0.0, nothing, nothing), var"#bp!#38"{Float64}(2.718281828459045)), Dual{Float64}(6.0, 0.0, Dual{Float64}[Dual{Float64}(3.0, 0.0, Dual{Float64}[Dual{Float64}(1.0, 0.0, nothing, nothing), Dual{Float64}(2.0, 0.0, nothing, nothing)], bpp!), Dual{Float64}(3.0, 0.0, nothing, nothing)], bpp!)], var"#bp!#37"{Float64}(0.16666666666666666))
      Dual{Float64}(1.231509349821775, 0.0, Dual{Float64}[Dual{Float64}(7.38905609893065, 0.0, Dual{Float64}(2.0, 0.0, nothing, nothing), var"#bp!#38"{Float64}(7.38905609893065)), Dual{Float64}(6.0, 0.0, Dual{Float64}[Dual{Float64}(3.0, 0.0, Dual{Float64}[Dual{Float64}(1.0, 0.0, nothing, nothing), Dual{Float64}(2.0, 0.0, nothing, nothing)], bpp!), Dual{Float64}(3.0, 0.0, nothing, nothing)], bpp!)], var"#bp!#37"{Float64}(0.16666666666666666))
 Dual{Float64}(3.3475894871979444, 0.0, Dual{Float64}[Dual{Float64}(20.085

In [49]:
d = D(f)

(::var"#dfdx#67"{typeof(f)}) (generic function with 3 methods)

In [51]:
d([1,2,3],2)

arrayDual{Int64}[Dual{Int64}(18, 1, Dual{Int64}[Dual{Int64}(6, 0, Dual{Int64}[Dual{Int64}(3, 0, nothing, var"#1#2"()), Dual{Int64}(2, 0, 2, var"#64#70"())], bpt!), Dual{Int64}(3, 0, 3, var"#64#70"())], bpt!)]

3-element Array{Int64,1}:
 0
 9
 6

In [39]:
softmax([1,2,3])

3-element Array{Float64,1}:
 0.09003057317038046
 0.24472847105479767
 0.6652409557748219