Skip to content

Commit

Permalink
Merge pull request #45 from JuliaDiffEq/sampling
Browse files Browse the repository at this point in the history
Sampling Methods for Data
  • Loading branch information
AlCap23 committed Feb 11, 2020
2 parents de617a8 + 83703ea commit b4fa3f6
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 2 deletions.
4 changes: 3 additions & 1 deletion Project.toml
Expand Up @@ -10,13 +10,15 @@ ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78"
ProximalOperators = "a725b495-10eb-56fe-b38b-717eba820537"
QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[compat]
Compat = "2.2, 3.0"
ModelingToolkit = "1.1.3"
ModelingToolkit = "1.2.5"
ProximalOperators = "0.10"
QuadGK = "2.3.1"
StatsBase = "0.32.0"
julia = "1"

[extras]
Expand Down
4 changes: 3 additions & 1 deletion src/DataDrivenDiffEq.jl
Expand Up @@ -2,7 +2,8 @@ module DataDrivenDiffEq

using LinearAlgebra
using ModelingToolkit
using QuadGK, Statistics
using QuadGK
using Statistics
using Compat

abstract type abstractBasis end;
Expand Down Expand Up @@ -46,5 +47,6 @@ export ISInDy
include("./utils.jl")
export AIC, AICC, BIC
export hankel, optimal_shrinkage, optimal_shrinkage!
export burst_sampling, subsample

end # module
71 changes: 71 additions & 0 deletions src/utils.jl
@@ -1,3 +1,5 @@
import StatsBase: sample

# Model selection

# Taken from https://royalsocietypublishing.org/doi/pdf/10.1098/rspa.2017.0009
Expand Down Expand Up @@ -100,3 +102,72 @@ function optimal_shrinkage!(X::AbstractArray{T, 2}) where T <: Number
X .= U*Diagonal(S)*V'
return
end


@inline function burst_sampling(x::AbstractArray, samplesize::Int64, bursts::Int64)
@assert size(x)[end] >= samplesize*bursts "Length of data array too small for subsampling of size $size!"
inds = sample(1:size(x)[end]-samplesize, bursts, replace = false)
inds = sort(unique(vcat([collect(i:i+samplesize) for i in inds]...)))
return resample(x, inds)
end


@inline function burst_sampling(x::AbstractArray, y::AbstractArray, samplesize::Int64, bursts::Int64)
@assert size(x)[end] >= samplesize*bursts "Length of data array too small for subsampling of size $size!"
@assert size(x)[end] == size(y)[end]
inds = sample(1:size(x)[end]-samplesize, bursts, replace = false)
inds = sort(unique(vcat([collect(i:i+samplesize) for i in inds]...)))
return resample(x, inds), resample(y, inds)
end


@inline function burst_sampling(x::AbstractArray, t::AbstractVector, period::T, bursts::Int64) where T <: AbstractFloat
@assert period > zero(typeof(period)) "Sampling period has to be positive."
@assert size(x)[end] == size(t)[end] "Provide consistent data."
@assert bursts >= 1 "Number of bursts has to be positive."
@assert t[end]-t[1]>= period*bursts "Bursting impossible. Please provide more data or reduce bursts."
t_ids = zero(eltype(t)) .<= t .- period .<= t[end] .- 2*period
samplesize = Int64(floor(period/(t[end]-t[1])*length(t)))
inds = sample(collect(1:length(t))[t_ids], bursts, replace = false)
inds = sort(unique(vcat([collect(i:i+samplesize) for i in inds]...)))
return resample(x, inds), resample(t, inds)
end


@inline function subsample(x::AbstractVector, frequency::Int64)
@assert frequency > 1
return x[1:frequency:end]
end


@inline function subsample(x::AbstractArray, frequency::Int64)
@assert frequency > 1
return x[:, 1:frequency:end]
end

@inline function subsample(x::AbstractArray, t::AbstractVector, period::T) where T <: AbstractFloat
@assert period > zero(typeof(period)) "Sampling period has to be positive."
@assert size(x)[end] == size(t)[end] "Provide consistent data."
@assert t[end]-t[1]>= period "Subsampling impossible. Sampling period exceeds time window."
idx = Int64[1]
t_now = t[1]
@inbounds for (i, t_current) in enumerate(t)
if t_current - t_now >= period
push!(idx, i)
t_now = t_current
end
end
return resample(x, idx), resample(t, idx)
end

@inline function resample(x::AbstractArray{T,1}, indx::AbstractArray{Int64}) where T <: Number
@assert maximum(indx) <= length(x)
@assert minimum(indx) >= 1
return x[indx]
end

@inline function resample(x::AbstractArray{T,2}, indx::AbstractArray{Int64}) where T <: Number
@assert maximum(indx) <= size(x, 2)
@assert minimum(indx) >= 1
return x[:, indx]
end
33 changes: 33 additions & 0 deletions test/runtests.jl
Expand Up @@ -291,4 +291,37 @@ end
@test BIC(k, X, Y) == -2*log(sum(abs2, X -Y)) + k*log(size(X)[2])
@test AICC(k, X, Y, likelyhood = (X,Y)->sum(abs, X-Y)) == AIC(k, X, Y, likelyhood = (X,Y)->sum(abs, X-Y))+ 2*(k+1)*(k+2)/(size(X)[2]-k-2)


# Sampling
X = randn(Float64, 2, 100)
t = collect(0:0.1:9.99)
Y = randn(size(X))
xt = burst_sampling(X, 5, 10)
@test 10 <= size(xt)[end] <= 60
@test all([any(xi .≈ X) for xi in eachcol(xt)])
xt, tt = burst_sampling(X, t, 5, 10)
@test all(diff(tt) .> 0.0)
@test size(xt)[end] == size(tt)[end]
@test all([any(xi .≈ X) for xi in eachcol(xt)])
@test !all([any(xi .≈ Y) for xi in eachcol(xt)])
xs, ts = burst_sampling(X, t, 2.0, 1)
@test all([any(xi .≈ X) for xi in eachcol(xs)])
@test size(xs)[end] == size(ts)[end]
@test ts[end]-ts[1] 2.0
X2n = subsample(X, 2)
t2n = subsample(t, 2)
@test size(X2n)[end] == size(t2n)[end]
@test size(X2n)[end] == Int(round(size(X)[end]/2))
@test X2n[:, 1] == X[:, 1]
@test X2n[:, end] == X[:, end-1]
@test all([any(xi .≈ X) for xi in eachcol(X2n)])
xs, ts = subsample(X, t, 0.5)
@test size(xs)[end] == size(ts)[end]
@test size(xs)[1] == size(X)[1]
@test all(diff(ts) .≈ 0.5)
# Loop this a few times to be sure its right
@test_nowarn for i in 1:20
xs, ts = burst_sampling(X, t, 2.0, 1)
xs, ts = subsample(X, t, 0.5)
end
end

0 comments on commit b4fa3f6

Please sign in to comment.