Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Surrogate Analysis #311

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0"
DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78"
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
Expand Down
7 changes: 6 additions & 1 deletion src/DataDrivenDiffEq.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ using Distributions
using QuadGK
using Statistics
using DataInterpolations

using ForwardDiff

using Requires
using ProgressMeter
Expand Down Expand Up @@ -41,6 +41,9 @@ abstract type AbstractKoopmanAlgorithm end
# Abstract symbolic_regression
abstract type AbstractSymbolicRegression end

# Abstract Surrogate
abstract type AbstractSurrogate end

# Problem and solution
abstract type AbstractDataDrivenProblem{dType, cType, probType} end
abstract type AbstractDataDrivenSolution end
Expand Down Expand Up @@ -125,7 +128,9 @@ export output, metrics, error, aic, determination

include("./solve/sindy.jl")
include("./solve/koopman.jl")
include("./solve/surrogates.jl")
export solve
export SurrogateSolvers

# Optional
function __init__()
Expand Down
2 changes: 1 addition & 1 deletion src/solution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ function DataDrivenSolution(prob::AbstractDataDrivenProblem, Ξ::AbstractMatrix,
if all(iszero.(Ξ))
@warn "Sparse regression failed! All coefficients are zero."
return DataDrivenSolution(
b, [], :failed, opt, Ξ, prob)
true, b, [], :failed, opt, Ξ, prob)
end

# Assert continuity
Expand Down
227 changes: 227 additions & 0 deletions src/solve/surrogates.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
function entangle(f::Function, idx::Int)
return f_(x) = getindex(f(x), idx)
end

function dependencies(∇, x::AbstractMatrix{T}, abstol = eps(), reltol= eps(); kwargs...) where T <: Number
mean(map(xi->normalize(abs.(∇(xi)), Inf), eachcol(x))) .>= reltol
end

function linearities(∇, x::AbstractMatrix{T}, abstol = eps(), reltol= eps(); kwargs...) where T <: Number
var(map(∇, eachcol(x))) .< reltol
end

mutable struct Surrogate <: AbstractSurrogate
# Original function
f::Function
# Gradient
∇::Function

# Dependent variables
deps::BitVector
# Linear variables
linears::BitVector

# Split
op::Function
children::AbstractVector{Surrogate}

function Surrogate(f, x::AbstractMatrix; abstol = eps(), reltol = eps(), kwargs...)
y_ = f(x[:,1])
if isa(y_, AbstractVector)
return map(1:size(y_, 1)) do i
f_ = entangle(f, i)
Surrogate(f_, x, abstol = abstol, reltol = reltol; kwargs...)
end
end

∇(x) = ForwardDiff.gradient(f, x)

deps = dependencies(∇, x, abstol, reltol; kwargs...)
linears = linearities(∇, x, abstol, reltol; kwargs...)
return new(
f, ∇, deps, linears .* deps
)
end
end

has_children(s::Surrogate) = isdefined(s, :children)


"""
$(TYPEDEF)

Defines a composition of solvers to be applied on the specific surrogate function.
'generator' should be defined in such a way that is generates basis elements for the (filtered)
dependent variables for explicit and - if used - implicit sparse regression, e.g.

```julia
generator(u) -> polynomial_basis(u,3)
generator(u,y)->vcat(generator(u), generator(u) .*y)

solvers = SurrogateSolvers(
generator, STLSQ(), ImplicitOptimizer(); kwargs...
)
```

# Fields

$(FIELDS)

# Note
This is still an experimental prototype and highly sensitive towards the tolerances.
"""
struct SurrogateSolvers{G, A, K}
generator::G
alg::A
kwargs::K

function SurrogateSolvers(generator::Function, args...; kwargs...)
return new{typeof(generator), typeof(args), typeof(kwargs)}(
generator, args, kwargs
)
end
end

function linear_split!(s::Surrogate, x::AbstractMatrix, abstol = eps(), reltol = eps(); kwargs...)
sum(s.linears) < 1 && return
s.linears == s.deps && return
has_children(s) && return


# Get coefficients
w = mean(map(s.∇, eachcol(x)))[s.linears]

g = let w = w, idx = s.linears
(x) -> dot(w, x[s.linears])
end

h = let f = s.f, g = g
(x) -> f(x) - g(x)
end

setfield!(s, :op, +)
setfield!(s, :children,
[
Surrogate(g, x, abstol = abstol, reltol = reltol; kwargs...);
Surrogate(h, x, abstol = abstol, reltol = reltol; kwargs...)
]
)
return
end

function surrogate_solve(s::Surrogate, x::AbstractMatrix, sol::SurrogateSolvers; abstol = 1e-5, reltol = eps())
has_children(s) && return composite_solve(s, x, sol, abstol = abstol, reltol = reltol)
# First see if its a linear fit
prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x))))
if s.linears == s.deps
u = [Symbolics.variable("u", i) for i in 1:size(x,1)]
ũ = u[s.deps]
b = Basis([ũ; 1], u)
return solve(prob, b, STLSQ(abstol); sol.kwargs...)
end
return pareto_solve(s, x, sol, abstol = abstol, reltol = reltol)
end

function pareto_solve(s::Surrogate, x::AbstractMatrix, sol::SurrogateSolvers; kwargs...)
res = map(sol.alg) do ai
surrogate_solve(s::Surrogate, x::AbstractMatrix, sol.generator, ai; kwargs..., sol.kwargs...)
end
valids = map(x->x.retcode == :solved, res)
res = [res[i] for i in 1:length(valids) if valids[i]]
idx = argmax(map(x->metrics(x)[:AIC], res))
return res[idx]
end

function surrogate_solve(s::Surrogate, x::AbstractMatrix, g::Function, args...; kwargs...)
prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x))))
return solve(prob, args...; kwargs...)
end

function surrogate_solve(s::Surrogate, x::AbstractMatrix, g::Function, opt::Optimize.AbstractOptimizer; kwargs...)
prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x))))
u = [Symbolics.variable("u", i) for i in 1:size(x,1)]
ũ = u[s.deps]
b = Basis(g(ũ), u)
return solve(prob, b, opt; kwargs...)
end

function surrogate_solve(s::Surrogate, x::AbstractMatrix, g::Function, opt::Optimize.AbstractSubspaceOptimizer; kwargs...)
prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x))))
u = [Symbolics.variable("u", i) for i in 1:size(x,1)]
y = Symbolics.variable("y")
ũ = u[s.deps]
b = Basis(g(ũ, y), [u;y])
return solve(prob, b, opt, [y]; kwargs...)
end

function composite_solve(s::Surrogate, x::AbstractMatrix, sol::SurrogateSolvers; kwargs...)
res = map(s.children) do c
surrogate_solve(c, x, sol; kwargs...)
end

# Get the results
basis = map(x->result(x), res)
equations = map(x->first(x).rhs, ModelingToolkit.equations.(basis))
pls = sum(length, parameters.(basis))
p = [Symbolics.variable("p", i) for i in 1:pls]
ps = reduce(vcat, map(parameters, res))
eq = Num(0)
cnt = 1
for (i,ei) in enumerate(equations)
subs = Dict()
for p_ in parameters(basis[i])
push!(subs, p_ => p[cnt])
cnt += 1
end
eq = s.op(Num(eq), substitute(Num(ei), subs))
end

b_new = Basis([eq], Num.(states(first(basis))), parameters = p)

prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x))))

return DataDrivenSolution(
b_new, ps, :solved, map(x->x.alg, res), s.children, prob, true, eval_expression = true
)
end



function merge_results(r, prob::DataDrivenDiffEq.AbstractDataDrivenProblem, args...; eval_expression = true, kwargs...)
# Collect all equations
# Create new parameters
res = result.(r)
l_ps = sum(map(length∘parameters, res))
ps = [Symbolics.variable("p", i) for i in 1:l_ps]
pvals = reduce(vcat, map(parameters, r))
# Substitue dict
p_cnt = 1
eqs = map(r) do ri
# Collect the parameters for substitution
psub = Dict()
for p_ in parameters(result(ri))
push!(psub, p_ => ps[p_cnt])
p_cnt += 1
end
_r = map(equations(result(ri))) do eqi
substitute(Num(eqi.rhs), psub)
end
end

b_new = Basis(reduce(vcat, eqs), Num.(states(first(res))), parameters = ps)
return DataDrivenSolution(
b_new, pvals, :solved, map(x->x.alg, r), r, prob, true, eval_expression = eval_expression
)
end

function DiffEqBase.solve(prob::DataDrivenProblem, f::Function, sol::SurrogateSolvers; abstol = eps(), reltol = eps(), kwargs...)
x, _ = get_oop_args(prob)
s = Surrogate(f, x, abstol = abstol, reltol = reltol)
res = map(s) do si
linear_split!(si, x, abstol = abstol, reltol = reltol)
surrogate_solve(si, x, sol, abstol = abstol, reltol = reltol)
end

## Merge the results
merge_results(res, prob; kwargs...)
end
10 changes: 7 additions & 3 deletions src/symbolic_regression/symbolic_regression.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ function DiffEqBase.solve(prob::AbstractDataDrivenProblem, alg::EQSearch;
numprocs = nothing, procs = nothing,
multithreading = false,
runtests::Bool = true,
eval_expression = false
eval_expression = false, kwargs...
)

opt = to_options(alg)
Expand All @@ -61,8 +61,12 @@ function DiffEqBase.solve(prob::AbstractDataDrivenProblem, alg::EQSearch;
numprocs = numprocs, procs = procs, multithreading = multithreading,
runtests = runtests)
# Sort the paretofront
doms = map(1:size(Y, 1)) do i
calculateParetoFrontier(X, Y[i, :], hof[i], opt)
if isa(hof, AbstractVector)
doms = map(1:size(Y, 1)) do i
calculateParetoFrontier(X, Y[i, :], hof[i], opt)
end
else
doms = [calculateParetoFrontier(X, Y[1,:], hof, opt)]
end

build_solution(prob, alg, doms; eval_expression = eval_expression)
Expand Down
34 changes: 34 additions & 0 deletions test/surrogates/surrogate.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#using Revise
#
#using DataDrivenDiffEq
#using ModelingToolkit
#using Random

# Init the problem
Random.seed!(1212)
ws = rand(4) .+ 0.2
f(x) = [-2.0sin(first(x))+5.0last(x); sum(ws[1:3] .* x[1:3]) + ws[end]; 3.0-2.0*x[6]/(1.0+3.0*x[5]^2) + x[3]]
x = randn(20, 200)
y = reduce(hcat, map(f, eachcol(x)))
ȳ = y .+ 1e-3*randn(size(y))

# Add a generator
generator(u) = vcat(polynomial_basis(u, 2), sin.(u))

generator(u,v) = begin
explicits = polynomial_basis(u, 2)
implicits = explicits .* v
return vcat(explicits, implicits)
end

# Add the solver and the problem
sol = SurrogateSolvers(generator, STLSQ(1e-2:1e-2:1.0), ImplicitOptimizer(0.1:0.1:1.0), maxiters = 1000, normalize_coefficients = false, progress = true)
prob = DirectDataDrivenProblem(x, ȳ)
res = solve(prob, f, sol, abstol = 3e-2, reltol = 3e-2)

# Tests here
for r in res
println(result(r))
println(metrics(r))
println(parameters(r))
end