In [14]:
import LinearAlgebra as la

import Random
rng = Random.MersenneTwister();

import ForwardDiff
import ReverseDiff
import Flux

In [2]:
positions = rand(rng, 5)

5-element Vector{Float64}:
 0.9877026744437778
 0.49257101943418724
 0.875310305528372
 0.054442928809441016
 0.6926596144509987

In [8]:
x_train = positions;

In [3]:
function model(positions)
    return sum(positions.^5) + 2
end

model (generic function with 1 method)

## Hessian

In [4]:
const CACHE = Dict{DataType, Any}()

Dict{DataType, Any}()

In [5]:
function inner(y, positions::Array{T}) where {T<:Real}
    if !haskey(CACHE, T)
        config = ReverseDiff.GradientConfig(positions)
        tape = ReverseDiff.compile(ReverseDiff.GradientTape(model, positions, config))
        CACHE[T] = tape
    end
    tape = CACHE[T]
    return ReverseDiff.gradient!(y, tape, positions)
end

function kineticMX(positions, inner, config, result, y)
    ForwardDiff.jacobian!(result, inner, y, positions, config)
    return result #/ SimpleG(positions)
end

kineticMX (generic function with 1 method)

In [9]:
y = similar(x_train)
config = ForwardDiff.JacobianConfig(inner, y, x_train);
result = zeros(5, 5);

In [12]:
@time kineticMX(x_train, inner, config, result, y)

  0.000022 seconds (1 allocation: 224 bytes)


5×5 Matrix{Float64}:
 19.2712  0.0       0.0     0.0         0.0
  0.0     2.39021   0.0     0.0         0.0
  0.0     0.0      13.4127  0.0         0.0
  0.0     0.0       0.0     0.00322741  0.0
  0.0     0.0       0.0     0.0         6.64645

## Flux model

In [15]:
model_flux = Flux.Chain(Flux.Dense(5,5),
               Flux.Dense(5,3),
               Flux.Dense(3,1),
               x -> exp.(x)[1])

Chain(
  Dense(5, 5),                          [90m# 30 parameters[39m
  Dense(5, 3),                          [90m# 18 parameters[39m
  Dense(3, 1),                          [90m# 4 parameters[39m
  var"#3#4"(),
)[90m                   # Total: 6 arrays, [39m52 parameters, 592 bytes.

In [20]:
@time model_flux(positions)

  0.000017 seconds (8 allocations: 560 bytes)


1.8392177604609379

In [21]:
@time model(positions)

  0.000010 seconds (2 allocations: 112 bytes)


3.642263833433814

## Gradient of flux model

In [22]:
ps = Flux.params(model_flux);

In [23]:
function grad_flux(model, positions)
    gs = Flux.gradient(() -> model(positions), ps)
    return gs
end

grad_flux (generic function with 1 method)

In [25]:
@time grad_flux_res = grad_flux(model_flux, positions)

  0.000239 seconds (316 allocations: 21.594 KiB)


Grads(...)

In [None]:
grad_flux_res[model_flux[1].W]