Just testing...

In [None]:

# # input/output dimension of system
# const ndim = 4

# # width of hidden layers
# const ld = 4

# # learning rate
# const η = .001

# # number of training runs
# const runs = 1000

# #evaluate neural network
# function network(x, model)
# 	# input layer
# 	layer0 = tanh.(model[1].W * x)

# 	# first hidden layer
# 	layer1 = tanh.(model[2].W * layer0 .+ model[2].b)
	
# 	# second hidden layer
# 	layer2 = tanh.(model[3].W * layer1 .+ model[3].b)

# 	# output layer (linear activation)
# 	model[4].W * layer2
# end

# model = (
# 	(W = randn(ld, ndim), ),
# 	(W = randn(ld, ld), b = randn(ld)),
# 	(W = randn(ld, ld), b = randn(ld)),
# 	(W = randn(ndim, ld), ),
# )

In [None]:
# x = [1, 2, 3, 4]
# network(x, model)

Code from Sparsification module

In [1]:
using Distributions
using GeometricIntegrators
using Optim
using Random
using Distances
using Symbolics
using RuntimeGeneratedFunctions
RuntimeGeneratedFunctions.init(@__MODULE__)

_prod(a, b, c, arrs...) = a .* _prod(b, c, arrs...)
_prod(a, b) = a .* b
_prod(a) = a


function get_z_vector(dims)
    @variables q[1:dims]
    @variables p[1:dims]
    z = vcat(q,p)
    return z
end

# make combinations of bases of just the order that is given 
# e.g order = 2 will give just the bases whose powers sum to 2
function poly_combos(z, order, inds...)
    if order == 0
        return Num[1]
    elseif order == length(inds)
        return [_prod([z[i] for i in inds]...)]
    else
        start_ind = length(inds) == 0 ? 1 : inds[end]
        return vcat([poly_combos(z, order, inds..., j) for j in start_ind:length(z)]...)
    end
end

# gives all bases monomials up to a certain order
function primal_monomial_basis(z, order::Int)
    return Vector{Symbolics.Num}(vcat([poly_combos(z, i) for i in 1:order]...))
end

# calculates coefficient bases up to a certain order
# mostly for use with trigonometric functions example sin(k*z),
# where k is the coefficient
function primal_coeff_basis(z, max_coeff::Int)
    return Vector{Symbolics.Num}(vcat([k .* z for k in 1:max_coeff]...))
end

# calculates +,-,*,/ between states as a new basis
# the return output is a set to avoid duplicates
function primal_operator_basis(z, operator)
    return Vector{Symbolics.Num}([operator(z[i], z[j]) for i in 1:length(z)-1 for j in i+1:length(z)] ∪ [operator(z[j], z[i]) for i in 1:length(z)-1 for j in i+1:length(z)])
end

# calculates power of states as a new basis
function primal_power_basis(z, max_power::Int)
    if max_power > 0
        return Vector{Symbolics.Num}(vcat([z.^i for i in 1:max_power]...))
    elseif max_power < 0
        return Vector{Symbolics.Num}(vcat([z.^-i for i in 1:abs(max_power)]...))
    end
end

function polynomial_basis(z::Vector{Symbolics.Num} = get_z_vector(2); polyorder::Int = 0, operator=nothing, max_coeff::Int = 0)
    primes = primal_monomial_basis(z, polyorder)
    primes = vcat(primes, primal_coeff_basis(z, max_coeff))
    if operator !== nothing
        primes = vcat(primes, primal_operator_basis(z, operator))
    end
    return primes
end

function trigonometric_basis(z::Vector{Symbolics.Num} = get_z_vector(2); polyorder::Int = 0, operator=nothing, max_coeff::Int = 0)
    primes = polynomial_basis(z, polyorder = polyorder, operator = operator, max_coeff = max_coeff)
    return vcat(sin.(primes), cos.(primes))
end

function exponential_basis(z::Vector{Symbolics.Num} = get_z_vector(2); polyorder::Int = 0, operator=nothing, max_coeff::Int = 0)
    primes = polynomial_basis(z, polyorder = polyorder, operator = operator, max_coeff = max_coeff)
    return exp.(primes)
end

function logarithmic_basis(z::Vector{Symbolics.Num} = get_z_vector(2); polyorder::Int = 0, operator=nothing, max_coeff::Int = 0)
    primes = polynomial_basis(z, polyorder = polyorder, operator = operator, max_coeff = max_coeff)
    return log.(abs.(primes))
end

function mixed_states_basis(basis::Vector{Symbolics.Num}...)
    mixed_states = Tuple(basis)
    
    ham = Vector{Symbolics.Num}()
    for i in eachindex(mixed_states)
        for j in i+1:lastindex(mixed_states)
            ham = vcat(ham, [mixed_states[i][k] * mixed_states[j][l] for k in 1:length(mixed_states[i]) for l in 1:length(mixed_states[j])])
        end
    end
    
    return Vector{Symbolics.Num}(ham)
end

# Returns the number of required coefficients for the bases
function get_numCoeffs(basis::Vector{Symbolics.Num})
    return length(basis)
end

# gets a vector of combinations of basis
function get_basis_set(bases::Vector{Symbolics.Num}...)
    # gets a vector of combinations of basis
    basis = vcat(bases...)
    
    # removes duplicates
    basis = Vector{Symbolics.Num}(collect(unique(basis)))

    return basis
end

function ΔH_func_builder(d::Int, z::Vector{Symbolics.Num} = get_z_vector(d), bases::Vector{Symbolics.Num}...) 
    # nd is the total number of dimensions of all the states, e.g. if q,p each of 3 dims, that is 6 dims in total
    nd = 2d
    Dz = Differential.(z)
    
    # collects and sums combinations of basis and coefficients"
    basis = get_basis_set(bases...)
   
    # gets number of terms in the basis
    @variables a[1:get_numCoeffs(basis)]
    
    # collect and sum combinations of basis and coefficients
    ham = sum(collect(a .* basis))
    
    # gives derivative of the hamiltonian, but not the skew-symmetric true one
    f = [expand_derivatives(dz(ham)) for dz in Dz]
    
    # line below makes the vector into a hamiltonian vector field by multiplying with the skew-symmetric matrix
    ∇H = vcat(f[d+1:2d], -f[1:d])
    
    # builds a function that calculates Hamiltonian gradient and converts the function to a native Julia function
    ∇H_eval = @RuntimeGeneratedFunction(Symbolics.inject_registered_module_functions(build_function(∇H, z, a)[2]))
    
    return ∇H_eval
end

struct HamiltonianSINDy{T, GHT} 
    basis::Vector{Symbolics.Num} # the augmented basis for sparsification
    analytical_fθ::GHT
    z::Vector{Symbolics.Num} 
    λ::T # Sparsification Parameter
    noise_level::T # Noise amplitude added to the data
    noiseGen_timeStep::T # Time step for the integrator to get noisy data 
    nloops::Int # Sparsification Loops
    
    function HamiltonianSINDy(basis::Vector{Symbolics.Num},
        analytical_fθ::GHT = missing,
        z::Vector{Symbolics.Num} = get_z_vector(2);
        λ::T = 0.05,
        noise_level::T = 0.01,
        noiseGen_timeStep::T = 0.05,
        nloops = 10) where {T, GHT <: Union{Base.Callable,Missing}}

        new{T, GHT}(basis, analytical_fθ, z, λ, noise_level, noiseGen_timeStep, nloops)
    end
end

function gen_noisy_ref_data(method::HamiltonianSINDy, x)
    # initialize timestep data for analytical solution
    tstep = method.noiseGen_timeStep
    tspan = (zero(tstep), tstep)

    function next_timestep(x)
        prob_ref = ODEProblem((dx, t, x, params) -> method.analytical_fθ(dx, x, params, t), tspan, tstep, x)
        sol = integrate(prob_ref, Gauss(2))
        sol.q[end]
    end

    data_ref = [next_timestep(_x) for _x in x]

    # add noise
    data_ref_noisy = [_x .+ method.noise_level .* randn(size(_x)) for _x in data_ref]

    return data_ref_noisy

end

struct TrainingData{AT<:AbstractArray}
    x::AT # initial condition
    ẋ::AT # initial condition
    y::AT # noisy data at next time step

    TrainingData(x::AT, ẋ::AT, y::AT) where {AT} = new{AT}(x, ẋ, y)
    TrainingData(x::AT, ẋ::AT) where {AT} = new{AT}(x, ẋ)
end


In [3]:
struct HamiltonianSINDyVectorField{DT,CT,GHT}
    # basis::BT
    coefficients::CT
    fθ::GHT

    function HamiltonianSINDyVectorField(coefficients::CT, fθ::GHT) where {DT, CT <: AbstractVector{DT}, GHT <: Base.Callable}
        new{DT,CT,GHT}(coefficients, fθ)
    end
end


function VectorField(method::HamiltonianSINDy, data::TrainingData; solver = Newton())
    # TODO: Check that first dimension x is even

    # dimension of system
    d = size(data.x[begin], 1) ÷ 2

    # returns function that builds hamiltonian gradient through symbolics
    fθ = ΔH_func_builder(d, method.z, method.basis)

    # Compute Sparse Regression
    #TODO: make sparsify method choosable through arguments
    # coeffs = sparsify_two(method, fθ, data.x, data.y, solver)
    coeffs = sparsify_parallel(method, fθ, data.x, data.y, solver)
    # coeffs = sparsify(method, fθ, data.x, data.ẋ, solver)
    # coeffs = sparsify_parallel_encoder(method, fθ, data, solver)
    
    HamiltonianSINDyVectorField(coeffs, fθ)
end


" wrapper function for generalized SINDY hamiltonian gradient.
Needs the output of fθ to work! "
function (vectorfield::HamiltonianSINDyVectorField)(dz, z)
    vectorfield.fθ(dz, z, vectorfield.coefficients)
    return dz
end

(vectorfield::HamiltonianSINDyVectorField)(dz, z, p, t) = vectorfield(dz, z)


In [4]:

# --------------------
# Setup
# --------------------

println("Setting up...")

# 2D system with 4 variables [q₁, q₂, p₁, p₂]
nd = 4

z = get_z_vector(nd/2)
polynomial = polynomial_basis(z, polyorder=3)
trigonometric  = trigonometric_basis(z, max_coeff=1)
prime_diff = primal_operator_basis(z, -)
basis = get_basis_set(polynomial, trigonometric, prime_diff)
# initialize analytical function, keep λ smaller than ϵ so system is identifiable
ϵ = 0.5
m = 1

# two-dim simple harmonic oscillator (not used anywhere only in case some testing needed)
# H_ana(x, p, t) = ϵ * x[1]^2 + ϵ * x[2]^2 + 1/(2*m) * x[3]^2 + 1/(2*m) * x[4]^2
# H_ana(x, p, t) = cos(x[1]) + cos(x[2]) + 1/(2*m) * x[3]^2 + 1/(2*m) * x[4]^2

# Gradient function of the 2D hamiltonian
# grad_H_ana(x) = [x[3]; x[4]; -2ϵ * x[1]; -2ϵ * x[2]]
grad_H_ana(x) = [x[3]; x[4]; sin(x[1]); sin(x[2])]
function grad_H_ana!(dx, x, p, t)
    dx .= grad_H_ana(x)
end
# ------------------------------------------------------------
# Training Data
# ------------------------------------------------------------

println("Generate Training Data...")

# number of samples
num_samp = 10

# samples in p and q space
samp_range = LinRange(-20, 20, num_samp)

# initialize vector of matrices to store ODE solve output

# s depend on size of nd (total dims), 4 in the case here so we use samp_range x samp_range x samp_range x samp_range
s = collect(Iterators.product(fill(samp_range, nd)...))


# compute vector field from x state values
x = [collect(s[i]) for i in eachindex(s)]
dx = zeros(nd)
p = 0
t = 0
ẋ = [grad_H_ana!(copy(dx), _x, p, t) for _x in x]


# ----------------------------------------
# Compute Sparse Regression
# ----------------------------------------

# choose SINDy method
# (λ parameter must be close to noise value so that only coeffs with value around the noise are sparsified away)
# noiseGen_timeStep chosen randomly for now
method = HamiltonianSINDy(basis, grad_H_ana!, z, λ = 0.05, noise_level = 0.00, noiseGen_timeStep = 0.05)

# generate noisy references data at next time step
y = gen_noisy_ref_data(method, x)

# collect training data
tdata = TrainingData(x, ẋ, y)

Setting up...
Generate Training Data...


TrainingData{Vector{Vector{Float64}}}([[-20.0, -20.0, -20.0, -20.0], [-15.555555555555557, -20.0, -20.0, -20.0], [-11.11111111111111, -20.0, -20.0, -20.0], [-6.66666666666667, -20.0, -20.0, -20.0], [-2.2222222222222214, -20.0, -20.0, -20.0], [2.2222222222222214, -20.0, -20.0, -20.0], [6.666666666666664, -20.0, -20.0, -20.0], [11.11111111111111, -20.0, -20.0, -20.0], [15.555555555555555, -20.0, -20.0, -20.0], [20.0, -20.0, -20.0, -20.0]  …  [-20.0, 20.0, 20.0, 20.0], [-15.555555555555557, 20.0, 20.0, 20.0], [-11.11111111111111, 20.0, 20.0, 20.0], [-6.66666666666667, 20.0, 20.0, 20.0], [-2.2222222222222214, 20.0, 20.0, 20.0], [2.2222222222222214, 20.0, 20.0, 20.0], [6.666666666666664, 20.0, 20.0, 20.0], [11.11111111111111, 20.0, 20.0, 20.0], [15.555555555555555, 20.0, 20.0, 20.0], [20.0, 20.0, 20.0, 20.0]], [[-20.0, -20.0, -0.9129452507276277, -0.9129452507276277], [-20.0, -20.0, -0.1518183733999112, -0.9129452507276277], [-20.0, -20.0, 0.9933330424549106, -0.9129452507276277], [-20.0, -

In [5]:
# dimension of system
d = size(tdata.x[begin], 1) ÷ 2

# returns function that builds hamiltonian gradient through symbolics
fθ = ΔH_func_builder(d, method.z, method.basis)

RuntimeGeneratedFunction(#=in Main=#, #=using Main=#, :((ˍ₋out, ˍ₋arg1, a)->begin
          #= C:\Users\nigel\.julia\packages\SymbolicUtils\H684H\src\code.jl:350 =#
          #= C:\Users\nigel\.julia\packages\SymbolicUtils\H684H\src\code.jl:351 =#
          #= C:\Users\nigel\.julia\packages\SymbolicUtils\H684H\src\code.jl:352 =#
          begin
              begin
                  #= C:\Users\nigel\.julia\packages\Symbolics\3jLt1\src\build_function.jl:520 =#
                  #= C:\Users\nigel\.julia\packages\SymbolicUtils\H684H\src\code.jl:399 =# @inbounds begin
                          #= C:\Users\nigel\.julia\packages\SymbolicUtils\H684H\src\code.jl:395 =#
                          ˍ₋out[1] = (+)((+)((+)((+)((+)((+)((+)((+)((+)((+)((+)((+)((+)((+)((+)((+)((+)((+)((+)((+)((+)((+)((*)((cos)(ˍ₋arg1[3]), (getindex)(a, 37)), (*)(-1, (getindex)(a, 44))), (*)(-1, (getindex)(a, 46))), (*)(-1, (getindex)(a, 54))), (*)((^)(ˍ₋arg1[4], 2), (getindex)(a, 33))), (*)((^)(ˍ₋arg1[1], 2), (getindex

In [19]:
# latent dimension: ld
ld = 4
ndim = size(tdata.x[begin], 1)
model = (
	(W = randn(ld, ndim), ),
	(W = randn(ld, ld), b = randn(ld)),
	(W = zeros(get_numCoeffs(method.basis)), ),
	(W = randn(ndim, ld), ),
)

((W = [-0.4940019128346206 0.09752468328156615 1.331578016230588 -0.20292327002091207; 1.0508045233474548 0.562582399634259 0.7766634643103753 0.32896203956253534; -0.5557696326381378 -1.2481492543397241 -1.094763897321301 -1.1971504151081356; -0.4758794012849007 0.3111896416850261 1.2150738574101774 -0.9713389451406967],), (W = [0.6228894676539797 -0.3523209295700162 0.03872303500267631 -2.0680877322938622; 0.6950920129222059 0.46192371922110653 0.8606743727326654 -0.19324242873678746; -1.523031345297429 1.7785063085186315 -0.1652217201543841 0.07972215585952311; 1.7254730509606304 -0.07890337274202382 1.85526847158796 -1.9142442957089896], b = [-0.6857378993056897, 0.20532056674221225, 1.686127974916049, 0.15750112532370059]), (W = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],), (W = [0.3472449093583463 -0.020971406276601153 -0.42272252135545524 -0.4635353215136673; -0.24806651582471337 -1.5716392671841364 0.5997844789041654 

In [20]:

# evaluate neural network
function network_two(xᵢₙ, model, method, fθ, solver)
	# input layer
	input_layer = tanh.(model[1].W * xᵢₙ)

	# first hidden layer
	x₀ = tanh.(model[2].W * input_layer .+ model[2].b)
	
	# SINDy layer
	function SINDy_layer()
        # coeffs initialized to a vector of zeros b/c easier to optimize zeros for our case
        coeffs = model[3].W

        numLoops = 4 # random choice of loop steps
        
        local x̄ = zeros(eltype(coeffs), axes(x₀))
        local x̃ = zeros(eltype(coeffs), axes(x₀))
        local f = zeros(eltype(coeffs), axes(x₀))

        # gradient at current (x) values
        fθ(f, x₀, coeffs)

        # for first guess use explicit euler
        x̃ .= x₀ .+ method.noiseGen_timeStep .* f
        
        for _ in 1:numLoops
            x̄ .= (x₀ .+ x̃) ./ 2
            # find gradient at {(x̃ₙ + x̃ⁱₙ₊₁)/2} to get Hermite extrapolation
            fθ(f, x̄, coeffs)
            # mid point rule for integration to next step
            x̃ .= x₀ .+ method.noiseGen_timeStep .* f
        end
        return x̃
    end

	# output layer (linear activation)
	return model[4].W * SINDy_layer()
end

network_two (generic function with 1 method)

small test

In [21]:
# xᵢₙ = tdata.x[1]
# network_two(xᵢₙ, model, method, fθ, Newton())

back to code

In [22]:
function loss_kernel(xᵢₙ, x₁, model, method, fθ, solver)
    x̃₁ = network_two(xᵢₙ, model, method, fθ, solver)
    # calculate square Euclidean distance
    sqeuclidean(x̃₁, x₁)
end

loss_kernel (generic function with 1 method)

In [23]:
# define loss function
function loss(coeffs::AbstractVector)
    # Convert the flattened parameters back to the original structure
    reconstructed_model = (
        (W = reshape(coeffs[1:ld*ndim], ld, ndim), ),
        (W = reshape(coeffs[ld*ndim+1:ld*(ld+ndim)], ld, ld), b = reshape(coeffs[ld*(ld+ndim)+1:ld*(ld+ndim)+ld], ld)),
        (W = coeffs[ld*(ld+ndim)+ld+1:ld*(ld+ndim)+ld+get_numCoeffs(method.basis)], ),
        (W = reshape(coeffs[ld*(ld+ndim)+ld+get_numCoeffs(method.basis)+1:end], ndim, ld), ),
    )

    mapreduce(z -> loss_kernel(z..., reconstructed_model, method, fθ, solver), +, zip(tdata.x, tdata.y))
end

loss (generic function with 1 method)

In [24]:
coeffs = [model[1].W, model[2].W, model[2].b, model[3].W, model[4].W]
using LinearAlgebra

# Flatten the model into a single vector
flattened_model = cat([vec(coeffs[i]) for i in 1:length(coeffs)]..., dims=1)


106-element Vector{Float64}:
 -0.4940019128346206
  1.0508045233474548
 -0.5557696326381378
 -0.4758794012849007
  0.09752468328156615
  0.562582399634259
 -1.2481492543397241
  0.3111896416850261
  1.331578016230588
  0.7766634643103753
  ⋮
 -1.1919998092070714
 -0.42272252135545524
  0.5997844789041654
  1.6573768396546567
  0.2132580698295217
 -0.4635353215136673
 -1.6714294452345042
 -1.576922255191032
  0.8797797275011311

In [25]:
# initial guess
println("Initial Guess...")
# Define the optimization solver
solver = BFGS()
result = Optim.optimize(loss, flattened_model, solver, Optim.Options(show_trace=true); autodiff = :forward)

coeffs .= result.minimizer

println(result)

Initial Guess...


Iter     Function value   Gradient norm 


     0     6.123124e+06     1.814828e+05
 * time: 0.018000125885009766


     1     5.537703e+06     1.476226e+05
 * time: 24.60199999809265


     2     4.881667e+06     1.122941e+05
 * time: 50.93900012969971


     3     4.114988e+06     8.854054e+04
 * time: 74.24100017547607


     4     3.520571e+06     1.447283e+05
 * time: 100.65900015830994


     5     3.392431e+06     9.206898e+04
 * time: 123.90400004386902


     6     3.246096e+06     8.558427e+04
 * time: 150.86600017547607


     7     3.120481e+06     7.889838e+04
 * time: 180.3270001411438


     8     3.005576e+06     6.576877e+04
 * time: 208.43200016021729


     9     2.928176e+06     6.741656e+04
 * time: 234.9430000782013


    10     2.739542e+06     6.356774e+04
 * time: 265.50800013542175


    11     2.695393e+06     1.055378e+05
 * time: 295.35199999809265


    12     2.639809e+06     7.977781e+04
 * time: 328.4670000076294


    13     2.431054e+06     6.369389e+04
 * time: 360.728000164032


    14     2.419894e+06     7.030843e+04
 * time: 384.6340000629425
