## TODO

Preallocate memory and send it in?

Save the result of SimpleG in from the derivative calculation

## Setup of particles and imports

In [1]:
import ForwardDiff
import ReverseDiff
import DiffResults
import Zygote
import SparseDiffTools

import Random
rng = Random.MersenneTwister()
import StaticArrays as sa
import ArraysOfArrays
import LinearAlgebra as la

using BenchmarkTools: @btime

## The normal SimpleGaussian struct

In [2]:
struct SimpleGaussian{V}
    alpha::Float64
    HOshape::V
    HOshape2::V
    function SimpleGaussian(alpha, HOshape)
        HOshape = Vector{Float64}(HOshape)
        return new{typeof(HOshape)}(alpha, HOshape, HOshape.^2)
    end
end
wf = SimpleGaussian(0.5, [1, 1, 1])

SimpleGaussian{Vector{Float64}}(0.5, [1.0, 1.0, 1.0], [1.0, 1.0, 1.0])

## The function we take the derivative of for all of this

In [204]:
function SimpleGparam(positions, alpha)
    res = 1
    for pos in eachcol(positions)
        res *= exp(-alpha * sum(pos.^2))
    end
    return res
end

SimpleGparam (generic function with 1 method)

In [4]:
SimpleG(positions) = SimpleGparam(positions, wf.alpha)

SimpleG (generic function with 1 method)

In [5]:
dims = 3
num = 10
positions = (Random.rand(rng, Float64, (3, 10)) .- 0.5) .* 2

3×10 Matrix{Float64}:
 -0.940459  -0.486988  -0.537274  …  -0.0472455   0.936945     0.166076
  0.849075  -0.87162   -0.545691      0.549107    0.00169244  -0.494792
 -0.183359  -0.585328  -0.587043     -0.979136   -0.0282555   -0.468784

In [6]:
SimpleG(positions)

0.00339037433152797

## ParamDer

In [7]:
function paramDer(positions, wf::SimpleGaussian)::Float64
    temp_vec = [0.0, 0.0, 0.0]
    for pos in eachcol(positions)
        temp_vec .+= pos.^2
    end
    temp_vec .= temp_vec .* wf.HOshape
    return -sum(temp_vec) / size(positions)[2]
end

paramDer (generic function with 1 method)

In [8]:
@time paramDer(positions, wf)

  0.084905 seconds (456.03 k allocations: 25.720 MiB, 99.97% compilation time)


-1.1373629882812466

### Forward

In [9]:
result = DiffResults.DiffResult(0.0, 0.0)

function paramDerAD(positions, wf, result)
    result = ForwardDiff.derivative!(result, alpha -> SimpleGparam(positions, alpha), wf.alpha)
    
    return DiffResults.derivative(result) / DiffResults.value(result) / size(positions)[2]
end

paramDerAD (generic function with 1 method)

In [10]:
@time paramDerAD(positions, wf, result)

  0.211629 seconds (526.89 k allocations: 32.003 MiB, 99.99% compilation time)


-1.1373629882812468

## Kinetic energy

In [41]:
function kineticAL(positions, wf::SimpleGaussian)::Float64
    temp_vec = [0.0, 0.0, 0.0] #maybe there is a faster way? why does this not allocate??
    for pos in eachcol(positions)
        temp_vec .+= pos.^2
    end
    return wf.alpha * (length(positions) - 2.0 * wf.alpha * sum(temp_vec))
end;

In [43]:
@time kineticAL(positions, wf)

  0.000005 seconds (2 allocations: 128 bytes)


9.313185058593767

### Forward

In [138]:
tmp = zeros(30,30);

In [158]:
function kineticFD(tmp, positions, wf::SimpleGaussian)
    ForwardDiff.hessian!(tmp, SimpleG, positions)
    return -0.5 * la.tr(tmp) / SimpleG(positions)
end;

In [192]:
@time kineticFD(tmp, positions, wf)

  0.000131 seconds (229 allocations: 353.375 KiB)


9.313185058593767

In [None]:
5.705033 seconds (15.35 M allocations: 1.024 GiB, 3.10% gc time)
0.000121 seconds (226 allocations: 353.625 KiB)

### Reverse

In [110]:
config = ReverseDiff.HessianConfig(positions)
tape = ReverseDiff.HessianTape(SimpleG, positions, config)
tape = ReverseDiff.compile(tape)
result = zeros(30, 30);

In [116]:
function kineticRD(result, tape, positions)
    result = ReverseDiff.hessian!(result, tape, positions)
    return -0.5 * la.tr(result) / SimpleG(positions)
end;

In [196]:
@time kineticRD(result, tape, positions)

  0.000382 seconds (15 allocations: 1.188 KiB)


9.313185058593767

In [None]:
compile tape 1.870571 seconds
compile kinetic func 0.526639 seconds

## Reverse Forward trick

In [20]:
const CACHE = Dict{DataType, Any}()

Dict{DataType, Any}()

In [212]:
CACHE

Dict{DataType, Any} with 1 entry:
  Dual{Tag{typeof(inner), Float64}, Float64,… => typename(CompiledTape)(SimpleG)

In [219]:
function inner(y, positions::Array{T}) where {T<:Real}
    if !haskey(CACHE, T)
        config = ReverseDiff.GradientConfig(positions)
        tape = ReverseDiff.compile(ReverseDiff.GradientTape(SimpleG, positions, config))
        CACHE[T] = tape
    end
    tape = CACHE[T]
    return ReverseDiff.gradient!(y, tape, positions)
end

function kineticMX(positions, inner, config, result, y)
    ForwardDiff.jacobian!(result, inner, y, positions, config)
    return -0.5 * la.tr(result) #/ SimpleG(positions)
end

kineticMX (generic function with 2 methods)

In [220]:
y = similar(positions)
config = ForwardDiff.JacobianConfig(inner, y, positions);
result = zeros(30, 30);

In [222]:
@time kineticMX(positions, inner, config, result, y)

  0.000046 seconds (28 allocations: 1.906 KiB)


0.03157518356742613

### Zygote

In [73]:
function kineticZg(positions, wf::SimpleGaussian) #maybe faster to put return type here?
    res = Zygote.diaghessian(SimpleG, positions)[1]
    return -0.5 * sum(res) / SimpleG(positions)
end;

In [76]:
@time tot = kineticZg(positions, wf)

  0.000542 seconds (3.40 k allocations: 271.562 KiB)


9.313185058593769

In [None]:
28.892248 seconds (86.97 M allocations: 5.314 GiB, 4.12% gc time, 99.99% compilation time)

### Sparse

In [None]:
import SparseArrays

In [None]:
const CACHE = Dict{DataType, Any}()

In [None]:
function inner(y2, positions::Array{T}) where {T<:Real}
    if !haskey(CACHE, T)
        tape = ReverseDiff.compile(ReverseDiff.GradientTape(positions -> SimpleG(positions, wf.alpha), positions))
        CACHE[T] = (tape, zeros(T, length(positions)))
    end
    tape, y = CACHE[T]
    return ReverseDiff.gradient!(y, tape, positions)
end

A = Float64.(la.I(30))
jac = Float64.(SparseArrays.sparse(A))
A_col = SparseDiffTools.matrix_colors(jac)

@time hessian = SparseDiffTools.forwarddiff_color_jacobian!(jac, inner, positions, colorvec = A_col)
#-0.5 * sum(hessian) / SimpleG(positions, wf.alpha)

## QF

In [27]:
function QF(positions, p1, wf::SimpleGaussian) #maybe faster to put return type here?
    temp_vec = -4 * wf.alpha .* positions[:, p1] .* wf.HOshape
    return temp_vec
end

QF (generic function with 1 method)

In [71]:
@time QF(positions, 1, wf)

  0.000005 seconds (2 allocations: 224 bytes)


3-element Vector{Float64}:
  1.8809178116466283
 -1.6981497503900425
  0.3667183658214679

### Reverse

In [67]:
config = ReverseDiff.GradientConfig(positions)
tape = ReverseDiff.GradientTape(SimpleG, positions, config)
tape = ReverseDiff.compile(tape)
result = zeros(30);

In [68]:
function QFRD(result, tape, positions, p1)
    result = ReverseDiff.gradient!(result, tape, positions)
    dims = size(positions)[1]
    pidx = (p1 - 1) * dims + 1
    return 2 .* result[pidx:pidx + dims - 1] / SimpleG(positions)
end;

In [70]:
@time QFRD(result, tape, positions, 1)

  0.000027 seconds (16 allocations: 1.484 KiB)


3-element Vector{Float64}:
  1.8809178116466283
 -1.6981497503900422
  0.36671836582146783