In [1]:
import Random
rng = Random.MersenneTwister()

import ReverseDiff as rd
import ForwardDiff as fd
import LinearAlgebra as la

In [2]:
struct Particles
    positions::Vector{Float64}
    dims::Int64
    num::Int64
end

function Particles(dims, num, rng)
    positions = zeros(dims * num)
    for i in 1:dims * num
        positions[i] = (Random.rand(rng, Float64) .- 0.5) .* 2
    end
    return Particles(positions, dims, num)
end

Particles

In [3]:
dims = 2
num = 2

2

In [4]:
particles = Particles(dims, num, rng)

Particles([0.9984949986841052, -0.40444724035160684, -0.9127128025760629, -0.68809116042693], 2, 2)

In [5]:
function dist(p)
    return 1.0 / (sqrt((p[1] - p[3])^2 + (p[2] - p[4])^2) + 1e-6)
end
dist(particles.positions)

0.5175602834977463

In [16]:
struct RBMDist{T}
    # Normal rbm stuff
    M::Int64 # Number of visible units
    N::Int64 # Number of hidden units
    W::Matrix{Float64}
    a::Vector{Float64} # Visible bias
    b::Vector{Float64} # Hidden bias
    σ::Float64
    
    # Hessian setup
    hes_grad_result::Vector{Float64}
    hes_grad_tapes::Dict{DataType, Any}
    hes_jac_result::Matrix{Float64}
    hes_jac_config::T
    
    function RBMDist(visible, hidden, σ, rng::Random.AbstractRNG)
        W = Random.rand(rng, Float64, (visible, hidden))
        a = Random.rand(rng, Float64, visible)
        b = Random.rand(rng, Float64, hidden)
        
        n = visible - 1
        
        hes_grad_result = zeros(n)
        config = rd.GradientConfig(hes_grad_result)
        hes_grad_tapes = Dict{DataType, Any}()
        hes_jac_result = zeros(n, n)
        hes_jac_config = fd.JacobianConfig(nothing, hes_grad_result, hes_grad_result)
        
        return new{typeof(hes_jac_config)}(visible, hidden, W, a, b, σ,
                                           hes_grad_result, hes_grad_tapes, hes_jac_result, hes_jac_config)
    end
end

In [76]:
rbm = RBMDist(dims * num + 1, 4, 1, rng);

In [77]:
function evaluate(X, wf::RBMDist)
    r = dist(X)
    sum1 = 0.0
    for i in 1:wf.M-1
        sum1 += (X[i] - wf.a[i])^2
    end
    sum1 += (r - wf.a[end])^2
    prod = exp(-sum1 / (2.0 * wf.σ^2))
    for j in 1:wf.N
        sum2 = 0.0
        for i in 1:wf.M-1
            sum2 += X[i] * wf.W[i, j]
        end
        sum2 += r * wf.W[end, j]
        prod *= 1.0 + exp(wf.b[j] + sum2 / wf.σ^2)
    end
    return prod
end

evaluate (generic function with 1 method)

In [78]:
@time evaluate(particles.positions, rbm)

  0.023505 seconds (38.16 k allocations: 2.241 MiB, 99.92% compilation time)


4.683236311485086

In [79]:
function hes_grad!(y, x::Array{T}, wf::RBMDist) where {T<:Real}
    if !haskey(wf.hes_grad_tapes, T)
        config = rd.GradientConfig(x)
        tape = rd.compile(rd.GradientTape(x -> evaluate(x, wf), x, config))
        wf.hes_grad_tapes[T] = tape
    end
    tape = wf.hes_grad_tapes[T]
    return rd.gradient!(y, tape, x)
end

function kinetic(particles, wf::RBMDist)
    x = particles.positions
    fd.jacobian!(wf.hes_jac_result, (y, x) -> hes_grad!(y, x, wf), wf.hes_grad_result, x, wf.hes_jac_config)
    return -0.5 * la.tr(wf.hes_jac_result)
end

kinetic (generic function with 1 method)

In [82]:
@time kinetic(particles, rbm)

  0.000023 seconds (1 allocation: 16 bytes)


-55.78843944615796