diff --git a/Project.toml b/Project.toml index f939bba1..20f449aa 100644 --- a/Project.toml +++ b/Project.toml @@ -6,13 +6,10 @@ version = "0.10.5" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" CellularAutomata = "878138dc-5b27-11ea-1a71-cb95d38d6b29" -Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" -PartialFunctions = "570af359-4316-4cb7-8c74-252c00c2016b" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" -Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" WeightInitializers = "d49dbf32-c5c2-4618-8acc-27bb2598ef2d" @@ -29,19 +26,17 @@ Adapt = "4.1.1" Aqua = "0.8" CellularAutomata = "0.0.2" DifferentialEquations = "7.15.0" -Distances = "0.10" LIBSVM = "0.8" LinearAlgebra = "1.10" MLJLinearModels = "0.9.2, 0.10" NNlib = "0.9.26" -PartialFunctions = "1.2" Random = "1.10" Reexport = "1.2.2" SafeTestsets = "0.1" Statistics = "1.10" StatsBase = "0.34.4" Test = "1" -WeightInitializers = "1.0.4" +WeightInitializers = "1.0.5" julia = "1.10" [extras] @@ -51,7 +46,9 @@ LIBSVM = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Aqua", "Test", "SafeTestsets", "Random", "DifferentialEquations", "MLJLinearModels", "LIBSVM"] +test = ["Aqua", "Test", "SafeTestsets", "Random", "DifferentialEquations", + "MLJLinearModels", "LIBSVM", "Statistics"] diff --git a/src/ReservoirComputing.jl b/src/ReservoirComputing.jl index e5cc8b68..8cf16ea7 100644 --- a/src/ReservoirComputing.jl +++ b/src/ReservoirComputing.jl @@ -1,126 +1,16 @@ module ReservoirComputing -using Adapt -using CellularAutomata -using Distances -using LinearAlgebra -using NNlib -using PartialFunctions -using Random +using Adapt: adapt +using CellularAutomata: CellularAutomaton +using LinearAlgebra: eigvals, mul!, I +using NNlib: fast_act, sigmoid +using Random: Random, AbstractRNG using Reexport: Reexport, @reexport -using Statistics using StatsBase: sample using WeightInitializers: DeviceAgnostic, PartialFunction, Utils @reexport using WeightInitializers -#define global types abstract type AbstractReservoirComputer end -abstract type AbstractOutputLayer end -abstract type AbstractPrediction end -#should probably move some of these -abstract type AbstractGRUVariant end - -#general output layer struct -struct OutputLayer{T, I, S, L} <: AbstractOutputLayer - training_method::T - output_matrix::I - out_size::S - last_value::L -end - -#prediction types -""" - Generative(prediction_len) - -A prediction strategy that enables models to generate autonomous multi-step -forecasts by recursively feeding their own outputs back as inputs for -subsequent prediction steps. - -# Parameters - - - `prediction_len::Int`: The number of future steps to predict. - -# Description - -The `Generative` prediction method allows a model to perform multi-step -forecasting by using its own previous predictions as inputs for future predictions. -This approach is especially useful in time series analysis, where each prediction -depends on the preceding data points. - -At each step, the model takes the current input, generates a prediction, -and then incorporates that prediction into the input for the next step. -This recursive process continues until the specified -number of prediction steps (`prediction_len`) is reached. -""" -struct Generative{T} <: AbstractPrediction - prediction_len::T -end - -struct Predictive{I, T} <: AbstractPrediction - prediction_data::I - prediction_len::T -end - -""" - Predictive(prediction_data) - -A prediction strategy for supervised learning tasks, -where a model predicts labels based on a provided set -of input features (`prediction_data`). - -# Parameters - - - `prediction_data`: The input data used for prediction, typically structured as a matrix - where each column represents a sample, and each row represents a feature. - -# Description - -The `Predictive` prediction method is a standard approach -in supervised machine learning tasks. It uses the provided input data -(`prediction_data`) to produce corresponding labels or outputs based -on the learned relationships in the model. Unlike generative prediction, -this method does not recursively feed predictions into the model; -instead, it operates on fixed input data to produce a single batch of predictions. - -This method is suitable for tasks like classification, -regression, or other use cases where the input features -and the number of steps are predefined. -""" -function Predictive(prediction_data) - prediction_len = size(prediction_data, 2) - Predictive(prediction_data, prediction_len) -end - -#fallbacks for initializers #eventually to remove once migrated to WeightInitializers.jl -for initializer in (:rand_sparse, :delay_line, :delay_line_backward, :cycle_jumps, - :simple_cycle, :pseudo_svd, - :scaled_rand, :weighted_init, :informed_init, :minimal_init) - @eval begin - function ($initializer)(dims::Integer...; kwargs...) - return $initializer(Utils.default_rng(), Float32, dims...; kwargs...) - end - function ($initializer)(rng::AbstractRNG, dims::Integer...; kwargs...) - return $initializer(rng, Float32, dims...; kwargs...) - end - function ($initializer)(::Type{T}, dims::Integer...; kwargs...) where {T <: Number} - return $initializer(Utils.default_rng(), T, dims...; kwargs...) - end - - # Partial application - function ($initializer)(rng::AbstractRNG; kwargs...) - return PartialFunction.Partial{Nothing}($initializer, rng, kwargs) - end - function ($initializer)(::Type{T}; kwargs...) where {T <: Number} - return PartialFunction.Partial{T}($initializer, nothing, kwargs) - end - function ($initializer)(rng::AbstractRNG, ::Type{T}; kwargs...) where {T <: Number} - return PartialFunction.Partial{T}($initializer, rng, kwargs) - end - function ($initializer)(; kwargs...) - return PartialFunction.Partial{Nothing}($initializer, nothing, kwargs) - end - end -end #general include("states.jl") @@ -130,8 +20,7 @@ include("predict.jl") include("train/linear_regression.jl") #esn -include("esn/esn_input_layers.jl") -include("esn/esn_reservoirs.jl") +include("esn/esn_inits.jl") include("esn/esn_reservoir_drivers.jl") include("esn/esn.jl") include("esn/deepesn.jl") @@ -155,9 +44,7 @@ export scaled_rand, weighted_init, informed_init, minimal_init export rand_sparse, delay_line, delay_line_backward, cycle_jumps, simple_cycle, pseudo_svd export RNN, MRNN, GRU, GRUParams, FullyGated, Minimal export train -export ESN -export HybridESN, KnowledgeModel -export DeepESN +export ESN, HybridESN, KnowledgeModel, DeepESN export RECA, sample export RandomMapping, RandomMaps export Generative, Predictive, OutputLayer diff --git a/src/esn/deepesn.jl b/src/esn/deepesn.jl index 7ce458fd..d9018128 100644 --- a/src/esn/deepesn.jl +++ b/src/esn/deepesn.jl @@ -44,27 +44,22 @@ temporal features. Default is an RNN model. - `nla_type`: The type of non-linear activation used in the reservoir. Default is `NLADefault()`. - - `states_type`: Defines the type of states used in the ESN (e.g., standard states). - Default is `StandardStates()`. - - `washout`: The number of initial timesteps to be discarded in the ESN's training phase. - Default is 0. - - `rng`: Random number generator used for initializing weights. Default is the package's - default random number generator. + - `states_type`: Defines the type of states used in the ESN + (e.g., standard states). Default is `StandardStates()`. + - `washout`: The number of initial timesteps to be discarded + in the ESN's training phase. Default is 0. + - `rng`: Random number generator used for initializing weights. + Default is `Utils.default_rng()`. - `matrix_type`: The type of matrix used for storing the training data. Default is inferred from `train_data`. # Example ```julia -# Prepare your training data -train_data = [your_training_data_here] +train_data = rand(Float32, 3, 100) # Create a DeepESN with specific parameters -deepESN = DeepESN(train_data, 10, 100; depth=3, washout=100) - -# Proceed with training and prediction (pseudocode) -train(deepESN, target_data) -prediction = predict(deepESN, new_data) +deepESN = DeepESN(train_data, 3, 100; depth=3, washout=100) ``` """ function DeepESN(train_data, @@ -82,7 +77,7 @@ function DeepESN(train_data, matrix_type=typeof(train_data)) if states_type isa AbstractPaddedStates in_size = size(train_data, 1) + 1 - train_data = vcat(Adapt.adapt(matrix_type, ones(1, size(train_data, 2))), + train_data = vcat(adapt(matrix_type, ones(1, size(train_data, 2))), train_data) end diff --git a/src/esn/esn.jl b/src/esn/esn.jl index b585db2a..07824cc4 100644 --- a/src/esn/esn.jl +++ b/src/esn/esn.jl @@ -15,33 +15,41 @@ end """ ESN(train_data; kwargs...) -> ESN -Creates an Echo State Network (ESN) using specified parameters and training data, suitable for various machine learning tasks. +Creates an Echo State Network (ESN). -# Parameters +# Arguments - - `train_data`: Matrix of training data (columns as time steps, rows as features). + - `train_data`: Matrix of training data `num_features x time_steps`. - `variation`: Variation of ESN (default: `Default()`). - `input_layer`: Input layer of ESN. - `reservoir`: Reservoir of the ESN. - `bias`: Bias vector for each time step. + - `rng`: Random number generator used for initializing weights. + Default is `Utils.default_rng()`. - `reservoir_driver`: Mechanism for evolving reservoir states (default: `RNN()`). - `nla_type`: Non-linear activation type (default: `NLADefault()`). - `states_type`: Format for storing states (default: `StandardStates()`). - `washout`: Initial time steps to discard (default: `0`). - `matrix_type`: Type of matrices used internally (default: type of `train_data`). -# Returns - - - An initialized ESN instance with specified parameters. - # Examples -```julia -using ReservoirComputing - -train_data = rand(10, 100) # 10 features, 100 time steps - -esn = ESN(train_data; reservoir=RandSparseReservoir(200), washout=10) +```jldoctest +julia> train_data = rand(Float32, 10, 100) # 10 features, 100 time steps +10×100 Matrix{Float32}: + 0.567676 0.154756 0.584611 0.294015 … 0.573946 0.894333 0.429133 + 0.327073 0.729521 0.804667 0.263944 0.559342 0.020167 0.897862 + 0.453606 0.800058 0.568311 0.749441 0.0713146 0.464795 0.532854 + 0.0173253 0.536959 0.722116 0.910328 0.00224048 0.00202501 0.631075 + 0.366744 0.119761 0.100593 0.125122 0.700562 0.675474 0.102947 + 0.539737 0.768351 0.54681 0.648672 … 0.256738 0.223784 0.94327 + 0.558099 0.42676 0.1948 0.735625 0.0989234 0.119342 0.624182 + 0.0603135 0.929999 0.263439 0.0372732 0.066125 0.332769 0.25562 + 0.4463 0.334423 0.444679 0.311695 0.0494497 0.27171 0.214925 + 0.987182 0.898593 0.295241 0.233098 0.789699 0.453692 0.759205 + +julia> esn = ESN(train_data, 10, 300; washout=10) +ESN(10 => 300) ``` """ function ESN(train_data, @@ -58,7 +66,7 @@ function ESN(train_data, matrix_type=typeof(train_data)) if states_type isa AbstractPaddedStates in_size = size(train_data, 1) + 1 - train_data = vcat(Adapt.adapt(matrix_type, ones(1, size(train_data, 2))), + train_data = vcat(adapt(matrix_type, ones(1, size(train_data, 2))), train_data) end @@ -86,6 +94,10 @@ function (esn::AbstractEchoStateNetwork)(prediction::AbstractPrediction, kwargs...) end +function Base.show(io::IO, esn::ESN) + print(io, "ESN(", size(esn.train_data, 1), " => ", size(esn.reservoir_matrix, 1), ")") +end + #training dispatch on esn """ train(esn::AbstractEchoStateNetwork, target_data, training_method = StandardRidge(0.0)) @@ -98,27 +110,27 @@ Trains an Echo State Network (ESN) using the provided target data and a specifie - `target_data`: Supervised training data for the ESN. - `training_method`: The method for training the ESN (default: `StandardRidge(0.0)`). -# Returns - - - The trained ESN model. Its type and structure depend on `training_method` and the ESN's implementation. - -# Returns - -The trained ESN model. The exact type and structure of the return value depends on the -`training_method` and the specific ESN implementation. - -```julia -using ReservoirComputing - -# Initialize an ESN instance and target data -esn = ESN(train_data; reservoir=RandSparseReservoir(200), washout=10) -target_data = rand(size(train_data, 2)) - -# Train the ESN using the default training method -trained_esn = train(esn, target_data) - -# Train the ESN using a custom training method -trained_esn = train(esn, target_data; training_method=StandardRidge(1.0)) +# Example + +```jldoctest +julia> train_data = rand(Float32, 10, 100) # 10 features, 100 time steps +10×100 Matrix{Float32}: + 0.11437 0.425367 0.585867 0.34078 … 0.0531493 0.761425 0.883164 + 0.301373 0.497806 0.279603 0.802417 0.49873 0.270156 0.333333 + 0.135224 0.660179 0.394233 0.512753 0.901221 0.784377 0.687691 + 0.510203 0.877234 0.614245 0.978405 0.332775 0.768826 0.527077 + 0.955027 0.398322 0.312156 0.981938 0.473357 0.156704 0.476101 + 0.353024 0.997632 0.164328 0.470783 … 0.745613 0.85797 0.465201 + 0.966044 0.194299 0.599167 0.040475 0.0996013 0.325959 0.770103 + 0.292068 0.495138 0.481299 0.214566 0.819573 0.155951 0.227168 + 0.133498 0.451058 0.0761995 0.90421 0.994212 0.332164 0.545112 + 0.214467 0.791524 0.124105 0.951805 0.947166 0.954244 0.889733 + +julia> esn = ESN(train_data, 10, 300; washout=10) +ESN(10 => 300) + +julia> output_layer = train(esn, rand(Float32, 3, 90)) +OutputLayer successfully trained with output size: 3 ``` """ function train(esn::AbstractEchoStateNetwork, diff --git a/src/esn/esn_inits.jl b/src/esn/esn_inits.jl new file mode 100644 index 00000000..929d23f3 --- /dev/null +++ b/src/esn/esn_inits.jl @@ -0,0 +1,688 @@ +### input layers +""" + scaled_rand([rng::AbstractRNG=Utils.default_rng()], [T=Float32], dims...; + scaling=0.1) + +Create and return a matrix with random values, uniformly distributed within +a range defined by `scaling`. + +# Arguments + + - `rng`: Random number generator. Default is `Utils.default_rng()` + from WeightInitializers. + - `T`: Type of the elements in the reservoir matrix. + Default is `Float32`. + - `dims`: Dimensions of the matrix. Should follow `res_size x in_size`. + - `scaling`: A scaling factor to define the range of the uniform distribution. + The matrix elements will be randomly chosen from the + range `[-scaling, scaling]`. Defaults to `0.1`. + +# Examples + +```jldoctest +julia> res_input = scaled_rand(8, 3) +8×3 Matrix{Float32}: + -0.0669356 -0.0292692 -0.0188943 + 0.0159724 0.004071 -0.0737949 + 0.026355 -0.0191563 0.0714962 + -0.0177412 0.0279123 0.0892906 + -0.0184405 0.0567368 0.0190222 + 0.0944272 0.0679244 0.0148647 + -0.0799005 -0.0891089 -0.0444782 + -0.0970182 0.0934286 0.03553 +``` +""" +function scaled_rand(rng::AbstractRNG, ::Type{T}, dims::Integer...; + scaling=T(0.1)) where {T <: Number} + res_size, in_size = dims + layer_matrix = (DeviceAgnostic.rand(rng, T, res_size, in_size) .- T(0.5)) .* + (T(2) * scaling) + return layer_matrix +end + +""" + weighted_init([rng::AbstractRNG=Utils.default_rng()], [T=Float32], dims...; + scaling=0.1) + +Create and return a matrix representing a weighted input layer. +This initializer generates a weighted input matrix with random non-zero +elements distributed uniformly within the range [-`scaling`, `scaling`] [^Lu2017]. + +# Arguments + + - `rng`: Random number generator. Default is `Utils.default_rng()` + from WeightInitializers. + - `T`: Type of the elements in the reservoir matrix. + Default is `Float32`. + - `dims`: Dimensions of the matrix. Should follow `res_size x in_size`. + - `scaling`: The scaling factor for the weight distribution. + Defaults to `0.1`. + +# Examples + +```jldoctest +julia> res_input = weighted_init(8, 3) +6×3 Matrix{Float32}: + 0.0452399 0.0 0.0 + -0.0348047 0.0 0.0 + 0.0 -0.0386004 0.0 + 0.0 0.00981022 0.0 + 0.0 0.0 0.0577838 + 0.0 0.0 -0.0562827 +``` + +[^Lu2017]: Lu, Zhixin, et al. + "Reservoir observers: Model-free inference of unmeasured variables in + chaotic systems." + Chaos: An Interdisciplinary Journal of Nonlinear Science 27.4 (2017): 041102. +""" +function weighted_init(rng::AbstractRNG, ::Type{T}, dims::Integer...; + scaling=T(0.1)) where {T <: Number} + approx_res_size, in_size = dims + res_size = Int(floor(approx_res_size / in_size) * in_size) + layer_matrix = DeviceAgnostic.zeros(rng, T, res_size, in_size) + q = floor(Int, res_size / in_size) + + for i in 1:in_size + layer_matrix[((i - 1) * q + 1):((i) * q), i] = (DeviceAgnostic.rand(rng, T, q) .- + T(0.5)) .* (T(2) * scaling) + end + + return layer_matrix +end + +""" + informed_init([rng::AbstractRNG=Utils.default_rng()], [T=Float32], dims...; + scaling=0.1, model_in_size, gamma=0.5) + +Create an input layer for informed echo state networks [^Pathak2018]. + +# Arguments + + - `rng`: Random number generator. Default is `Utils.default_rng()` + from WeightInitializers. + - `T`: Type of the elements in the reservoir matrix. + Default is `Float32`. + - `dims`: Dimensions of the matrix. Should follow `res_size x in_size`. + - `scaling`: The scaling factor for the input matrix. + Default is 0.1. + - `model_in_size`: The size of the input model. + - `gamma`: The gamma value. Default is 0.5. + +# Examples + +[^Pathak2018]: Pathak, Jaideep, et al. "Hybrid forecasting of chaotic processes: + Using machine learning in conjunction with a knowledge-based model." + Chaos: An Interdisciplinary Journal of Nonlinear Science 28.4 (2018). +""" +function informed_init(rng::AbstractRNG, ::Type{T}, dims::Integer...; + scaling=T(0.1), model_in_size, gamma=T(0.5)) where {T <: Number} + res_size, in_size = dims + state_size = in_size - model_in_size + + if state_size <= 0 + throw(DimensionMismatch("in_size must be greater than model_in_size")) + end + + input_matrix = DeviceAgnostic.zeros(rng, T, res_size, in_size) + zero_connections = DeviceAgnostic.zeros(rng, T, in_size) + num_for_state = floor(Int, res_size * gamma) + num_for_model = floor(Int, res_size * (1 - gamma)) + + for i in 1:num_for_state + idxs = findall(Bool[zero_connections .== input_matrix[i, :] + for i in 1:size(input_matrix, 1)]) + random_row_idx = idxs[DeviceAgnostic.rand(rng, T, 1:end)] + random_clm_idx = range(1, state_size; step=1)[DeviceAgnostic.rand(rng, T, 1:end)] + input_matrix[random_row_idx, random_clm_idx] = (DeviceAgnostic.rand(rng, T) - + T(0.5)) .* (T(2) * scaling) + end + + for i in 1:num_for_model + idxs = findall(Bool[zero_connections .== input_matrix[i, :] + for i in 1:size(input_matrix, 1)]) + random_row_idx = idxs[DeviceAgnostic.rand(rng, T, 1:end)] + random_clm_idx = range(state_size + 1, in_size; step=1)[DeviceAgnostic.rand( + rng, T, 1:end)] + input_matrix[random_row_idx, random_clm_idx] = (DeviceAgnostic.rand(rng, T) - + T(0.5)) .* (T(2) * scaling) + end + + return input_matrix +end + +""" + minimal_init([rng::AbstractRNG=Utils.default_rng()], [T=Float32], dims...; + sampling_type=:bernoulli, weight=0.1, irrational=pi, start=1, p=0.5) + +Create a layer matrix with uniform weights determined by `weight`. The sign difference +is randomly determined by the `sampling` chosen. + +# Arguments + + - `rng`: Random number generator. Default is `Utils.default_rng()` + from WeightInitializers. + - `T`: Type of the elements in the reservoir matrix. + Default is `Float32`. + - `dims`: Dimensions of the matrix. Should follow `res_size x in_size`. + - `weight`: The weight used to fill the layer matrix. Default is 0.1. + - `sampling_type`: The sampling parameters used to generate the input matrix. + Default is `:bernoulli`. + - `irrational`: Irrational number chosen for sampling if `sampling_type=:irrational`. + Default is `pi`. + - `start`: Starting value for the irrational sample. Default is 1 + - `p`: Probability for the Bernoulli sampling. Lower probability increases negative + value. Higher probability increases positive values. Default is 0.5 + +# Examples + +```jldoctest +julia> res_input = minimal_init(8, 3) +8×3 Matrix{Float32}: + 0.1 -0.1 0.1 + -0.1 0.1 0.1 + -0.1 -0.1 0.1 + -0.1 -0.1 -0.1 + 0.1 0.1 0.1 + -0.1 -0.1 -0.1 + -0.1 -0.1 0.1 + 0.1 -0.1 0.1 + +julia> res_input = minimal_init(8, 3; sampling_type=:irrational) +8×3 Matrix{Float32}: + -0.1 0.1 -0.1 + 0.1 -0.1 -0.1 + 0.1 0.1 -0.1 + 0.1 0.1 0.1 + -0.1 -0.1 -0.1 + 0.1 0.1 0.1 + 0.1 0.1 -0.1 + -0.1 0.1 -0.1 + +julia> res_input = minimal_init(8, 3; p=0.1) # lower p -> more negative signs +8×3 Matrix{Float32}: + -0.1 -0.1 -0.1 + -0.1 -0.1 -0.1 + -0.1 -0.1 -0.1 + -0.1 -0.1 -0.1 + 0.1 -0.1 -0.1 + -0.1 -0.1 -0.1 + -0.1 -0.1 -0.1 + -0.1 -0.1 -0.1 + +julia> res_input = minimal_init(8, 3; p=0.8)# higher p -> more positive signs +8×3 Matrix{Float32}: + 0.1 0.1 0.1 + -0.1 0.1 0.1 + -0.1 0.1 0.1 + 0.1 0.1 0.1 + 0.1 0.1 0.1 + 0.1 -0.1 0.1 + -0.1 0.1 0.1 + 0.1 0.1 0.1 +``` +""" +function minimal_init(rng::AbstractRNG, ::Type{T}, dims::Integer...; + sampling_type::Symbol=:bernoulli, weight::Number=T(0.1), irrational::Real=pi, + start::Int=1, p::Number=T(0.5)) where {T <: Number} + res_size, in_size = dims + if sampling_type == :bernoulli + layer_matrix = _create_bernoulli(p, res_size, in_size, weight, rng, T) + elseif sampling_type == :irrational + layer_matrix = _create_irrational(irrational, + start, + res_size, + in_size, + weight, + rng, + T) + else + error("Sampling type not allowed. Please use one of :bernoulli or :irrational") + end + return layer_matrix +end + +function _create_bernoulli(p::Number, res_size::Int, in_size::Int, weight::Number, + rng::AbstractRNG, ::Type{T}) where {T <: Number} + input_matrix = DeviceAgnostic.zeros(rng, T, res_size, in_size) + for i in 1:res_size + for j in 1:in_size + if DeviceAgnostic.rand(rng, T) < p + input_matrix[i, j] = weight + else + input_matrix[i, j] = -weight + end + end + end + return input_matrix +end + +function _create_irrational(irrational::Irrational, start::Int, res_size::Int, + in_size::Int, weight::Number, rng::AbstractRNG, + ::Type{T}) where {T <: Number} + setprecision(BigFloat, Int(ceil(log2(10) * (res_size * in_size + start + 1)))) + ir_string = string(BigFloat(irrational)) |> collect + deleteat!(ir_string, findall(x -> x == '.', ir_string)) + ir_array = DeviceAgnostic.zeros(rng, T, length(ir_string)) + input_matrix = DeviceAgnostic.zeros(rng, T, res_size, in_size) + + for i in 1:length(ir_string) + ir_array[i] = parse(Int, ir_string[i]) + end + + for i in 1:res_size + for j in 1:in_size + random_number = DeviceAgnostic.rand(rng, T) + input_matrix[i, j] = random_number < 0.5 ? -weight : weight + end + end + + return T.(input_matrix) +end + +### reservoirs + +""" + rand_sparse([rng::AbstractRNG=Utils.default_rng()], [T=Float32], dims...; + radius=1.0, sparsity=0.1, std=1.0) + +Create and return a random sparse reservoir matrix. +The matrix will be of size specified by `dims`, with specified `sparsity` +and scaled spectral radius according to `radius`. + +# Arguments + + - `rng`: Random number generator. Default is `Utils.default_rng()` + from WeightInitializers. + - `T`: Type of the elements in the reservoir matrix. + Default is `Float32`. + - `dims`: Dimensions of the reservoir matrix. + - `radius`: The desired spectral radius of the reservoir. + Defaults to 1.0. + - `sparsity`: The sparsity level of the reservoir matrix, + controlling the fraction of zero elements. Defaults to 0.1. + +# Examples + +```jldoctest +julia> res_matrix = rand_sparse(5, 5; sparsity=0.5) +5×5 Matrix{Float32}: + 0.0 0.0 0.0 0.0 0.0 + 0.0 0.794565 0.0 0.26164 0.0 + 0.0 0.0 -0.931294 0.0 0.553706 + 0.723235 -0.524727 0.0 0.0 0.0 + 1.23723 0.0 0.181824 -1.5478 0.465328 +``` +""" +function rand_sparse(rng::AbstractRNG, ::Type{T}, dims::Integer...; + radius=T(1.0), sparsity=T(0.1), std=T(1.0)) where {T <: Number} + lcl_sparsity = T(1) - sparsity #consistency with current implementations + reservoir_matrix = sparse_init(rng, T, dims...; sparsity=lcl_sparsity, std=std) + rho_w = maximum(abs.(eigvals(reservoir_matrix))) + reservoir_matrix .*= radius / rho_w + if Inf in unique(reservoir_matrix) || -Inf in unique(reservoir_matrix) + error("Sparsity too low for size of the matrix. Increase res_size or increase sparsity") + end + return reservoir_matrix +end + +""" + delay_line([rng::AbstractRNG=Utils.default_rng()], [T=Float32], dims...; + weight=0.1) + +Create and return a delay line reservoir matrix [^Rodan2010]. + +# Arguments + + - `rng`: Random number generator. Default is `Utils.default_rng()` + from WeightInitializers. + - `T`: Type of the elements in the reservoir matrix. + Default is `Float32`. + - `dims`: Dimensions of the reservoir matrix. + - `weight`: Determines the value of all connections in the reservoir. + Default is 0.1. + +# Examples + +```jldoctest +julia> res_matrix = delay_line(5, 5) +5×5 Matrix{Float32}: + 0.0 0.0 0.0 0.0 0.0 + 0.1 0.0 0.0 0.0 0.0 + 0.0 0.1 0.0 0.0 0.0 + 0.0 0.0 0.1 0.0 0.0 + 0.0 0.0 0.0 0.1 0.0 + +julia> res_matrix = delay_line(5, 5; weight=1) +5×5 Matrix{Float32}: + 0.0 0.0 0.0 0.0 0.0 + 1.0 0.0 0.0 0.0 0.0 + 0.0 1.0 0.0 0.0 0.0 + 0.0 0.0 1.0 0.0 0.0 + 0.0 0.0 0.0 1.0 0.0 +``` + +[^Rodan2010]: Rodan, Ali, and Peter Tino. "Minimum complexity echo state network." + IEEE transactions on neural networks 22.1 (2010): 131-144. +""" +function delay_line(rng::AbstractRNG, ::Type{T}, dims::Integer...; + weight=T(0.1)) where {T <: Number} + reservoir_matrix = DeviceAgnostic.zeros(rng, T, dims...) + @assert length(dims) == 2&&dims[1] == dims[2] "The dimensions + must define a square matrix (e.g., (100, 100))" + + for i in 1:(dims[1] - 1) + reservoir_matrix[i + 1, i] = weight + end + + return reservoir_matrix +end + +""" + delay_line_backward([rng::AbstractRNG=Utils.default_rng()], [T=Float32], dims...; + weight = 0.1, fb_weight = 0.2) + +Create a delay line backward reservoir with the specified by `dims` and weights. +Creates a matrix with backward connections as described in [^Rodan2010]. + +# Arguments + + - `rng`: Random number generator. Default is `Utils.default_rng()` + from WeightInitializers. + - `T`: Type of the elements in the reservoir matrix. + Default is `Float32`. + - `dims`: Dimensions of the reservoir matrix. + - `weight`: The weight determines the absolute value of + forward connections in the reservoir. Default is 0.1 + - `fb_weight`: Determines the absolute value of backward connections + in the reservoir. Default is 0.2 + +# Examples + +```jldoctest +julia> res_matrix = delay_line_backward(5, 5) +5×5 Matrix{Float32}: + 0.0 0.2 0.0 0.0 0.0 + 0.1 0.0 0.2 0.0 0.0 + 0.0 0.1 0.0 0.2 0.0 + 0.0 0.0 0.1 0.0 0.2 + 0.0 0.0 0.0 0.1 0.0 + +julia> res_matrix = delay_line_backward(Float16, 5, 5) +5×5 Matrix{Float16}: + 0.0 0.2 0.0 0.0 0.0 + 0.1 0.0 0.2 0.0 0.0 + 0.0 0.1 0.0 0.2 0.0 + 0.0 0.0 0.1 0.0 0.2 + 0.0 0.0 0.0 0.1 0.0 +``` + +[^Rodan2010]: Rodan, Ali, and Peter Tino. "Minimum complexity echo state network." + IEEE transactions on neural networks 22.1 (2010): 131-144. +""" +function delay_line_backward(rng::AbstractRNG, ::Type{T}, dims::Integer...; + weight=T(0.1), fb_weight=T(0.2)) where {T <: Number} + res_size = first(dims) + reservoir_matrix = DeviceAgnostic.zeros(rng, T, dims...) + + for i in 1:(res_size - 1) + reservoir_matrix[i + 1, i] = weight + reservoir_matrix[i, i + 1] = fb_weight + end + + return reservoir_matrix +end + +""" + cycle_jumps([rng::AbstractRNG=Utils.default_rng()], [T=Float32], dims...; + cycle_weight = 0.1, jump_weight = 0.1, jump_size = 3) + +Create a cycle jumps reservoir with the specified dimensions, +cycle weight, jump weight, and jump size. + +# Arguments + + - `rng`: Random number generator. Default is `Utils.default_rng()` + from WeightInitializers. + - `T`: Type of the elements in the reservoir matrix. + Default is `Float32`. + - `dims`: Dimensions of the reservoir matrix. + - `cycle_weight`: The weight of cycle connections. + Default is 0.1. + - `jump_weight`: The weight of jump connections. + Default is 0.1. + - `jump_size`: The number of steps between jump connections. + Default is 3. + +# Examples + +```jldoctest +julia> res_matrix = cycle_jumps(5, 5) +5×5 Matrix{Float32}: + 0.0 0.0 0.0 0.1 0.1 + 0.1 0.0 0.0 0.0 0.0 + 0.0 0.1 0.0 0.0 0.0 + 0.1 0.0 0.1 0.0 0.0 + 0.0 0.0 0.0 0.1 0.0 + +julia> res_matrix = cycle_jumps(5, 5; jump_size=2) +5×5 Matrix{Float32}: + 0.0 0.0 0.1 0.0 0.1 + 0.1 0.0 0.0 0.0 0.0 + 0.1 0.1 0.0 0.0 0.1 + 0.0 0.0 0.1 0.0 0.0 + 0.0 0.0 0.1 0.1 0.0 +``` + +[^Rodan2012]: Rodan, Ali, and Peter Tiňo. "Simple deterministically constructed cycle reservoirs + with regular jumps." Neural computation 24.7 (2012): 1822-1852. +""" +function cycle_jumps(rng::AbstractRNG, ::Type{T}, dims::Integer...; + cycle_weight::Number=T(0.1), jump_weight::Number=T(0.1), + jump_size::Int=3) where {T <: Number} + res_size = first(dims) + reservoir_matrix = DeviceAgnostic.zeros(rng, T, dims...) + + for i in 1:(res_size - 1) + reservoir_matrix[i + 1, i] = cycle_weight + end + + reservoir_matrix[1, res_size] = cycle_weight + + for i in 1:jump_size:(res_size - jump_size) + tmp = (i + jump_size) % res_size + if tmp == 0 + tmp = res_size + end + reservoir_matrix[i, tmp] = jump_weight + reservoir_matrix[tmp, i] = jump_weight + end + + return reservoir_matrix +end + +""" + simple_cycle([rng::AbstractRNG=Utils.default_rng()], [T=Float32], dims...; + weight = 0.1) + +Create a simple cycle reservoir with the specified dimensions and weight. + +# Arguments + + - `rng`: Random number generator. Default is `Utils.default_rng()` + from WeightInitializers. + - `T`: Type of the elements in the reservoir matrix. Default is `Float32`. + - `dims`: Dimensions of the reservoir matrix. + - `weight`: Weight of the connections in the reservoir matrix. + Default is 0.1. + +# Examples + +```jldoctest +julia> res_matrix = simple_cycle(5, 5) +5×5 Matrix{Float32}: + 0.0 0.0 0.0 0.0 0.1 + 0.1 0.0 0.0 0.0 0.0 + 0.0 0.1 0.0 0.0 0.0 + 0.0 0.0 0.1 0.0 0.0 + 0.0 0.0 0.0 0.1 0.0 + +julia> res_matrix = simple_cycle(5, 5; weight=11) +5×5 Matrix{Float32}: + 0.0 0.0 0.0 0.0 11.0 + 11.0 0.0 0.0 0.0 0.0 + 0.0 11.0 0.0 0.0 0.0 + 0.0 0.0 11.0 0.0 0.0 + 0.0 0.0 0.0 11.0 0.0 +``` + +[^Rodan2010]: Rodan, Ali, and Peter Tino. "Minimum complexity echo state network." + IEEE transactions on neural networks 22.1 (2010): 131-144. +""" +function simple_cycle(rng::AbstractRNG, ::Type{T}, dims::Integer...; + weight=T(0.1)) where {T <: Number} + reservoir_matrix = DeviceAgnostic.zeros(rng, T, dims...) + + for i in 1:(dims[1] - 1) + reservoir_matrix[i + 1, i] = weight + end + + reservoir_matrix[1, dims[1]] = weight + return reservoir_matrix +end + +""" + pseudo_svd([rng::AbstractRNG=Utils.default_rng()], [T=Float32], dims...; + max_value=1.0, sparsity=0.1, sorted = true, reverse_sort = false) + +Returns an initializer to build a sparse reservoir matrix with the given +`sparsity` by using a pseudo-SVD approach as described in [^yang]. + +# Arguments + + - `rng`: Random number generator. Default is `Utils.default_rng()` + from WeightInitializers. + - `T`: Type of the elements in the reservoir matrix. + Default is `Float32`. + - `dims`: Dimensions of the reservoir matrix. + - `max_value`: The maximum absolute value of elements in the matrix. + Default is 1.0 + - `sparsity`: The desired sparsity level of the reservoir matrix. + Default is 0.1 + - `sorted`: A boolean indicating whether to sort the singular values before + creating the diagonal matrix. Default is `true`. + - `reverse_sort`: A boolean indicating whether to reverse the sorted + singular values. Default is `false`. + +# Examples + +```jldoctest +julia> res_matrix = pseudo_svd(5, 5) +5×5 Matrix{Float32}: + 0.306998 0.0 0.0 0.0 0.0 + 0.0 0.325977 0.0 0.0 0.0 + 0.0 0.0 0.549051 0.0 0.0 + 0.0 0.0 0.0 0.726199 0.0 + 0.0 0.0 0.0 0.0 1.0 +``` + +[^yang]: Yang, Cuili, et al. "_Design of polynomial echo state networks for time series prediction._" Neurocomputing 290 (2018): 148-160. +""" +function pseudo_svd(rng::AbstractRNG, ::Type{T}, dims::Integer...; + max_value::Number=T(1.0), sparsity::Number=0.1, sorted::Bool=true, + reverse_sort::Bool=false) where {T <: Number} + reservoir_matrix = create_diag(rng, T, dims[1], + max_value; + sorted=sorted, + reverse_sort=reverse_sort) + tmp_sparsity = get_sparsity(reservoir_matrix, dims[1]) + + while tmp_sparsity <= sparsity + reservoir_matrix *= create_qmatrix(rng, T, dims[1], + rand_range(rng, T, dims[1]), + rand_range(rng, T, dims[1]), + DeviceAgnostic.rand(rng, T) * T(2) - T(1)) + tmp_sparsity = get_sparsity(reservoir_matrix, dims[1]) + end + + return reservoir_matrix +end + +#hacky workaround for the moment +function rand_range(rng, T, n::Int) + return Int(1 + floor(DeviceAgnostic.rand(rng, T) * n)) +end + +function create_diag(rng::AbstractRNG, ::Type{T}, dim::Number, max_value::Number; + sorted::Bool=true, reverse_sort::Bool=false) where {T <: Number} + diagonal_matrix = DeviceAgnostic.zeros(rng, T, dim, dim) + if sorted == true + if reverse_sort == true + diagonal_values = sort( + DeviceAgnostic.rand(rng, T, dim) .* max_value; rev=true) + diagonal_values[1] = max_value + else + diagonal_values = sort(DeviceAgnostic.rand(rng, T, dim) .* max_value) + diagonal_values[end] = max_value + end + else + diagonal_values = DeviceAgnostic.rand(rng, T, dim) .* max_value + end + + for i in 1:dim + diagonal_matrix[i, i] = diagonal_values[i] + end + + return diagonal_matrix +end + +function create_qmatrix(rng::AbstractRNG, ::Type{T}, dim::Number, + coord_i::Number, coord_j::Number, theta::Number) where {T <: Number} + qmatrix = DeviceAgnostic.zeros(rng, T, dim, dim) + + for i in 1:dim + qmatrix[i, i] = 1.0 + end + + qmatrix[coord_i, coord_i] = cos(theta) + qmatrix[coord_j, coord_j] = cos(theta) + qmatrix[coord_i, coord_j] = -sin(theta) + qmatrix[coord_j, coord_i] = sin(theta) + return qmatrix +end + +function get_sparsity(M, dim) + return size(M[M .!= 0], 1) / (dim * dim - size(M[M .!= 0], 1)) #nonzero/zero elements +end + +### fallbacks +#fallbacks for initializers #eventually to remove once migrated to WeightInitializers.jl +for initializer in (:rand_sparse, :delay_line, :delay_line_backward, :cycle_jumps, + :simple_cycle, :pseudo_svd, + :scaled_rand, :weighted_init, :informed_init, :minimal_init) + @eval begin + function ($initializer)(dims::Integer...; kwargs...) + return $initializer(Utils.default_rng(), Float32, dims...; kwargs...) + end + function ($initializer)(rng::AbstractRNG, dims::Integer...; kwargs...) + return $initializer(rng, Float32, dims...; kwargs...) + end + function ($initializer)(::Type{T}, dims::Integer...; kwargs...) where {T <: Number} + return $initializer(Utils.default_rng(), T, dims...; kwargs...) + end + + # Partial application + function ($initializer)(rng::AbstractRNG; kwargs...) + return PartialFunction.Partial{Nothing}($initializer, rng, kwargs) + end + function ($initializer)(::Type{T}; kwargs...) where {T <: Number} + return PartialFunction.Partial{T}($initializer, nothing, kwargs) + end + function ($initializer)(rng::AbstractRNG, ::Type{T}; kwargs...) where {T <: Number} + return PartialFunction.Partial{T}($initializer, rng, kwargs) + end + function ($initializer)(; kwargs...) + return PartialFunction.Partial{Nothing}($initializer, nothing, kwargs) + end + end +end diff --git a/src/esn/esn_input_layers.jl b/src/esn/esn_input_layers.jl deleted file mode 100644 index 04487f96..00000000 --- a/src/esn/esn_input_layers.jl +++ /dev/null @@ -1,240 +0,0 @@ -""" - scaled_rand(rng::AbstractRNG, ::Type{T}, dims::Integer...; scaling=T(0.1)) where {T <: Number} - -Create and return a matrix with random values, uniformly distributed within a range defined by `scaling`. This function is useful for initializing matrices, such as the layers of a neural network, with scaled random values. - -# Arguments - - - `rng`: An instance of `AbstractRNG` for random number generation. - - `T`: The data type for the elements of the matrix. - - `dims`: Dimensions of the matrix. It must be a 2-element tuple specifying the number of rows and columns (e.g., `(res_size, in_size)`). - - `scaling`: A scaling factor to define the range of the uniform distribution. The matrix elements will be randomly chosen from the range `[-scaling, scaling]`. Defaults to `T(0.1)`. - -# Returns - -A matrix of type with dimensions specified by `dims`. Each element of the matrix is a random number uniformly distributed between `-scaling` and `scaling`. - -# Example - -```julia -rng = Random.default_rng() -matrix = scaled_rand(rng, Float64, (100, 50); scaling=0.2) -``` -""" -function scaled_rand(rng::AbstractRNG, - ::Type{T}, - dims::Integer...; - scaling=T(0.1)) where {T <: Number} - res_size, in_size = dims - layer_matrix = (DeviceAgnostic.rand(rng, T, res_size, in_size) .- T(0.5)) .* - (T(2) * scaling) - return layer_matrix -end - -""" - weighted_init(rng::AbstractRNG, ::Type{T}, dims::Integer...; scaling=T(0.1)) where {T <: Number} - -Create and return a matrix representing a weighted input layer for Echo State Networks (ESNs). This initializer generates a weighted input matrix with random non-zero elements distributed uniformly within the range [-`scaling`, `scaling`], inspired by the approach in [^Lu]. - -# Arguments - - - `rng`: An instance of `AbstractRNG` for random number generation. - - `T`: The data type for the elements of the matrix. - - `dims`: A 2-element tuple specifying the approximate reservoir size and input size (e.g., `(approx_res_size, in_size)`). - - `scaling`: The scaling factor for the weight distribution. Defaults to `T(0.1)`. - -# Returns - -A matrix representing the weighted input layer as defined in [^Lu2017]. The matrix dimensions will be adjusted to ensure each input unit connects to an equal number of reservoir units. - -# Example - -```julia -rng = Random.default_rng() -input_layer = weighted_init(rng, Float64, (3, 300); scaling=0.2) -``` - -# References - -[^Lu2017]: Lu, Zhixin, et al. - "Reservoir observers: Model-free inference of unmeasured variables in chaotic systems." - Chaos: An Interdisciplinary Journal of Nonlinear Science 27.4 (2017): 041102. -""" -function weighted_init(rng::AbstractRNG, - ::Type{T}, - dims::Integer...; - scaling=T(0.1)) where {T <: Number} - approx_res_size, in_size = dims - res_size = Int(floor(approx_res_size / in_size) * in_size) - layer_matrix = DeviceAgnostic.zeros(rng, T, res_size, in_size) - q = floor(Int, res_size / in_size) - - for i in 1:in_size - layer_matrix[((i - 1) * q + 1):((i) * q), i] = (DeviceAgnostic.rand(rng, T, q) .- - T(0.5)) .* (T(2) * scaling) - end - - return layer_matrix -end - -""" - informed_init(rng::AbstractRNG, ::Type{T}, dims::Integer...; scaling=T(0.1), model_in_size, gamma=T(0.5)) where {T <: Number} - -Create a layer of a neural network. - -# Arguments - - - `rng::AbstractRNG`: The random number generator. - - `T::Type`: The data type. - - `dims::Integer...`: The dimensions of the layer. - - `scaling::T = T(0.1)`: The scaling factor for the input matrix. - - `model_in_size`: The size of the input model. - - `gamma::T = T(0.5)`: The gamma value. - -# Returns - - - `input_matrix`: The created input matrix for the layer. - -# Example - -```julia -rng = Random.default_rng() -dims = (100, 200) -model_in_size = 50 -input_matrix = informed_init(rng, Float64, dims; model_in_size=model_in_size) -``` -""" -function informed_init(rng::AbstractRNG, ::Type{T}, dims::Integer...; - scaling=T(0.1), model_in_size, gamma=T(0.5)) where {T <: Number} - res_size, in_size = dims - state_size = in_size - model_in_size - - if state_size <= 0 - throw(DimensionMismatch("in_size must be greater than model_in_size")) - end - - input_matrix = DeviceAgnostic.zeros(rng, T, res_size, in_size) - zero_connections = DeviceAgnostic.zeros(rng, T, in_size) - num_for_state = floor(Int, res_size * gamma) - num_for_model = floor(Int, res_size * (1 - gamma)) - - for i in 1:num_for_state - idxs = findall(Bool[zero_connections .== input_matrix[i, :] - for i in 1:size(input_matrix, 1)]) - random_row_idx = idxs[DeviceAgnostic.rand(rng, T, 1:end)] - random_clm_idx = range(1, state_size; step=1)[DeviceAgnostic.rand(rng, T, 1:end)] - input_matrix[random_row_idx, random_clm_idx] = (DeviceAgnostic.rand(rng, T) - - T(0.5)) .* (T(2) * scaling) - end - - for i in 1:num_for_model - idxs = findall(Bool[zero_connections .== input_matrix[i, :] - for i in 1:size(input_matrix, 1)]) - random_row_idx = idxs[DeviceAgnostic.rand(rng, T, 1:end)] - random_clm_idx = range(state_size + 1, in_size; step=1)[DeviceAgnostic.rand( - rng, T, 1:end)] - input_matrix[random_row_idx, random_clm_idx] = (DeviceAgnostic.rand(rng, T) - - T(0.5)) .* (T(2) * scaling) - end - - return input_matrix -end - -""" - irrational_sample_init(rng::AbstractRNG, ::Type{T}, dims::Integer...; - weight = 0.1, - sampling = IrrationalSample(; irrational = pi, start = 1) - ) where {T <: Number} - -Create a layer matrix using the provided random number generator and sampling parameters. - -# Arguments - - - `rng::AbstractRNG`: The random number generator used to generate random numbers. - - `dims::Integer...`: The dimensions of the layer matrix. - - `weight`: The weight used to fill the layer matrix. Default is 0.1. - - `sampling`: The sampling parameters used to generate the input matrix. Default is IrrationalSample(irrational = pi, start = 1). - -# Returns - -The layer matrix generated using the provided random number generator and sampling parameters. - -# Example - -```julia -using Random -rng = Random.default_rng() -dims = (3, 2) -weight = 0.5 -layer_matrix = irrational_sample_init(rng, Float64, dims; weight=weight, - sampling=IrrationalSample(; irrational=sqrt(2), start=1)) -``` -""" -function minimal_init(rng::AbstractRNG, ::Type{T}, dims::Integer...; - sampling_type::Symbol=:bernoulli, - weight::Number=T(0.1), - irrational::Real=pi, - start::Int=1, - p::Number=T(0.5)) where {T <: Number} - res_size, in_size = dims - if sampling_type == :bernoulli - layer_matrix = _create_bernoulli(p, res_size, in_size, weight, rng, T) - elseif sampling_type == :irrational - layer_matrix = _create_irrational(irrational, - start, - res_size, - in_size, - weight, - rng, - T) - else - error("Sampling type not allowed. Please use one of :bernoulli or :irrational") - end - return layer_matrix -end - -function _create_bernoulli(p::Number, - res_size::Int, - in_size::Int, - weight::Number, - rng::AbstractRNG, - ::Type{T}) where {T <: Number} - input_matrix = DeviceAgnostic.zeros(rng, T, res_size, in_size) - for i in 1:res_size - for j in 1:in_size - if DeviceAgnostic.rand(rng, T) < p - input_matrix[i, j] = weight - else - input_matrix[i, j] = -weight - end - end - end - return input_matrix -end - -function _create_irrational(irrational::Irrational, - start::Int, - res_size::Int, - in_size::Int, - weight::Number, - rng::AbstractRNG, - ::Type{T}) where {T <: Number} - setprecision(BigFloat, Int(ceil(log2(10) * (res_size * in_size + start + 1)))) - ir_string = string(BigFloat(irrational)) |> collect - deleteat!(ir_string, findall(x -> x == '.', ir_string)) - ir_array = DeviceAgnostic.zeros(rng, T, length(ir_string)) - input_matrix = DeviceAgnostic.zeros(rng, T, res_size, in_size) - - for i in 1:length(ir_string) - ir_array[i] = parse(Int, ir_string[i]) - end - - for i in 1:res_size - for j in 1:in_size - random_number = DeviceAgnostic.rand(rng, T) - input_matrix[i, j] = random_number < 0.5 ? -weight : weight - end - end - - return T.(input_matrix) -end diff --git a/src/esn/esn_predict.jl b/src/esn/esn_predict.jl index ba6c80da..46e30e95 100644 --- a/src/esn/esn_predict.jl +++ b/src/esn/esn_predict.jl @@ -93,16 +93,16 @@ end function allocate_outpad(hesn::HybridESN, states_type, out) pad_length = length(out) + size(hesn.model.model_data[:, 1], 1) - out_tmp = Adapt.adapt(typeof(out), zeros(pad_length)) + out_tmp = adapt(typeof(out), zeros(pad_length)) return allocate_singlepadding(states_type, out_tmp) end function allocate_singlepadding(::AbstractPaddedStates, out) - Adapt.adapt(typeof(out), zeros(size(out, 1) + 1)) + adapt(typeof(out), zeros(size(out, 1) + 1)) end function allocate_singlepadding(::StandardStates, out) - Adapt.adapt(typeof(out), zeros(size(out, 1))) + adapt(typeof(out), zeros(size(out, 1))) end function allocate_singlepadding(::ExtendedStates, out) - Adapt.adapt(typeof(out), zeros(size(out, 1))) + adapt(typeof(out), zeros(size(out, 1))) end diff --git a/src/esn/esn_reservoir_drivers.jl b/src/esn/esn_reservoir_drivers.jl index 52c1a2c3..46e4cda8 100644 --- a/src/esn/esn_reservoir_drivers.jl +++ b/src/esn/esn_reservoir_drivers.jl @@ -1,31 +1,25 @@ abstract type AbstractReservoirDriver end """ - create_states( - reservoir_driver::AbstractReservoirDriver, - train_data, - washout, - reservoir_matrix, - input_matrix, - bias_vector - ) + create_states(reservoir_driver::AbstractReservoirDriver, train_data, washout, + reservoir_matrix, input_matrix, bias_vector) -Create and return the trained Echo State Network (ESN) states according to the specified reservoir driver. +Create and return the trained Echo State Network (ESN) states according to the +specified reservoir driver. # Arguments - - `reservoir_driver::AbstractReservoirDriver`: The reservoir driver that determines how the ESN states evolve over time. + - `reservoir_driver`: The reservoir driver that determines how the ESN states evolve + over time. - `train_data`: The training data used to train the ESN. - - `washout::Int`: The number of initial time steps to discard during training to allow the reservoir dynamics to wash out the initial conditions. - - `reservoir_matrix`: The reservoir matrix representing the dynamic, recurrent part of the ESN. - - `input_matrix`: The input matrix that defines the connections between input features and reservoir nodes. - - `bias_vector`: The bias vector to be added at each time step during the reservoir update. - -# Returns - - - A matrix of trained ESN states, where each column represents the state at a specific time step. - -This function is responsible for creating and returning the states of the ESN during training based on the provided training data and parameters. + - `washout`: The number of initial time steps to discard during training to allow the + reservoir dynamics to wash out the initial conditions. + - `reservoir_matrix`: The reservoir matrix representing the dynamic, recurrent part of + the ESN. + - `input_matrix`: The input matrix that defines the connections between input features + and reservoir nodes. + - `bias_vector`: The bias vector to be added at each time step during the reservoir + update. """ function create_states(reservoir_driver::AbstractReservoirDriver, train_data, @@ -36,9 +30,9 @@ function create_states(reservoir_driver::AbstractReservoirDriver, train_len = size(train_data, 2) - washout res_size = size(reservoir_matrix, 1) - states = Adapt.adapt(typeof(train_data), zeros(res_size, train_len)) + states = adapt(typeof(train_data), zeros(res_size, train_len)) tmp_array = allocate_tmp(reservoir_driver, typeof(train_data), res_size) - _state = Adapt.adapt(typeof(train_data), zeros(res_size, 1)) + _state = adapt(typeof(train_data), zeros(res_size, 1)) for i in 1:washout yv = @view train_data[:, i] @@ -65,9 +59,9 @@ function create_states(reservoir_driver::AbstractReservoirDriver, train_len = size(train_data, 2) - washout res_size = sum([size(reservoir_matrix[i], 1) for i in 1:length(reservoir_matrix)]) - states = Adapt.adapt(typeof(train_data), zeros(res_size, train_len)) + states = adapt(typeof(train_data), zeros(res_size, train_len)) tmp_array = allocate_tmp(reservoir_driver, typeof(train_data), res_size) - _state = Adapt.adapt(typeof(train_data), zeros(res_size)) + _state = adapt(typeof(train_data), zeros(res_size)) for i in 1:washout for j in 1:length(reservoir_matrix) @@ -99,7 +93,8 @@ end RNN(activation_function, leaky_coefficient) RNN(;activation_function=tanh, leaky_coefficient=1.0) -Returns a Recurrent Neural Network (RNN) initializer for the Echo State Network (ESN). +Returns a Recurrent Neural Network (RNN) initializer for +echo state networks (`ESN`). # Arguments @@ -108,13 +103,12 @@ Returns a Recurrent Neural Network (RNN) initializer for the Echo State Network # Keyword Arguments - - `activation_function`: The activation function used in the RNN. Defaults to `tanh`. - - `leaky_coefficient`: The leaky coefficient used in the RNN. Defaults to 1.0. - -This function creates an RNN object with the specified activation function and leaky coefficient, -which can be used as a reservoir driver in the ESN. + - `activation_function`: The activation function used in the RNN. + Defaults to `tanh_fast`. + - `leaky_coefficient`: The leaky coefficient used in the RNN. + Defaults to 1.0. """ -function RNN(; activation_function=NNlib.fast_act(tanh), leaky_coefficient=1.0) +function RNN(; activation_function=fast_act(tanh), leaky_coefficient=1.0) RNN(activation_function, leaky_coefficient) end @@ -148,7 +142,7 @@ function next_state!(out, rnn::RNN, x, y, W::Vector, W_in, b, tmp_array) end function allocate_tmp(::RNN, tmp_type, res_size) - return [Adapt.adapt(tmp_type, zeros(res_size, 1)) for i in 1:2] + return [adapt(tmp_type, zeros(res_size, 1)) for i in 1:2] end #multiple RNN driver @@ -163,25 +157,32 @@ end MRNN(;activation_function=[tanh, sigmoid], leaky_coefficient=1.0, scaling_factor=fill(leaky_coefficient, length(activation_function))) -Returns a Multiple RNN (MRNN) initializer for the Echo State Network (ESN), introduced in [^lun]. +Returns a Multiple RNN (MRNN) initializer for the Echo State Network (ESN), +introduced in [^Lun2015]. # Arguments - - `activation_function`: A vector of activation functions used in the MRNN. + - `activation_function`: A vector of activation functions used + in the MRNN. - `leaky_coefficient`: The leaky coefficient used in the MRNN. - - `scaling_factor`: A vector of scaling factors for combining activation functions. + - `scaling_factor`: A vector of scaling factors for combining activation + functions. # Keyword Arguments - - `activation_function`: A vector of activation functions used in the MRNN. Defaults to `[tanh, sigmoid]`. - - `leaky_coefficient`: The leaky coefficient used in the MRNN. Defaults to 1.0. - - `scaling_factor`: A vector of scaling factors for combining activation functions. Defaults to an array of the same size as `activation_function` with all elements set to `leaky_coefficient`. - -This function creates an MRNN object with the specified activation functions, leaky coefficient, and scaling factors, which can be used as a reservoir driver in the ESN. + - `activation_function`: A vector of activation functions used in the MRNN. + Defaults to `[tanh, sigmoid]`. + - `leaky_coefficient`: The leaky coefficient used in the MRNN. + Defaults to 1.0. + - `scaling_factor`: A vector of scaling factors for combining activation functions. + Defaults to an array of the same size as `activation_function` with all + elements set to `leaky_coefficient`. -# Reference: +This function creates an MRNN object with the specified activation functions, +leaky coefficient, and scaling factors, which can be used as a reservoir driver +in the ESN. -[^lun]: Lun, Shu-Xian, et al. +[^Lun2015]: Lun, Shu-Xian, et al. "_A novel model of leaky integrator echo state network for time-series prediction._" Neurocomputing 159 (2015): 58-66. """ @@ -208,20 +209,11 @@ function next_state!(out, mrnn::MRNN, x, y, W, W_in, b, tmp_array) return out end -#= -function next_state!(out, mrnn::MRNN, x, y, W, W_in, b, tmp_array) - rnn_next_state = (1-mrnn.leaky_coefficient).*x - for i=1:length(mrnn.scaling_factor) - rnn_next_state += mrnn.scaling_factor[i]*mrnn.activation_function[i].((W*x).+(W_in*y).+b) - end - rnn_next_state -end -=# - function allocate_tmp(::MRNN, tmp_type, res_size) - return [Adapt.adapt(tmp_type, zeros(res_size, 1)) for i in 1:2] + return [adapt(tmp_type, zeros(res_size, 1)) for i in 1:2] end +abstract type AbstractGRUVariant end #GRU-based driver struct GRU{F, L, R, V, B} #not an abstractreservoirdriver activation_function::F @@ -235,19 +227,15 @@ end """ FullyGated() -Returns a Fully Gated Recurrent Unit (FullyGated) initializer for the Echo State Network (ESN). - -This function creates a FullyGated object, which can be used as a reservoir driver in the ESN. -The FullyGated variant is described in the literature reference [^cho]. +Returns a Fully Gated Recurrent Unit (FullyGated) initializer +for the Echo State Network (ESN). -# Returns +Returns the standard gated recurrent unit [^Cho2014] as a driver for the +echo state network (`ESN`). - - `FullyGated`: A FullyGated reservoir driver. - -# Reference - -[^cho]: Cho, Kyunghyun, et al. - "_Learning phrase representations using RNN encoder-decoder for statistical machine translation._" +[^Cho2014]: Cho, Kyunghyun, et al. + "_Learning phrase representations using RNN encoder-decoder + for statistical machine translation._" arXiv preprint arXiv:1406.1078 (2014). """ struct FullyGated <: AbstractGRUVariant end @@ -255,9 +243,10 @@ struct FullyGated <: AbstractGRUVariant end """ Minimal() -Returns a minimal GRU ESN initializer as described in [^Zhou]. +Returns a minimal GRU ESN initializer as described in [^Zhou2016]. -[^Zhou]: Zhou, Guo-Bing, et al. "_Minimal gated unit for recurrent neural networks._" +[^Zhou2016]: Zhou, Guo-Bing, et al. "_Minimal gated unit for recurrent + neural networks._" International Journal of Automation and Computing 13.3 (2016): 226-234. """ struct Minimal <: AbstractGRUVariant end @@ -270,27 +259,28 @@ struct Minimal <: AbstractGRUVariant end bias = fill(DenseLayer(), 2), variant = FullyGated()) -Returns a Gated Recurrent Unit (GRU) reservoir driver for Echo State Networks (ESNs). This driver is based on the GRU architecture [^Cho], which is designed to capture temporal dependencies in data and is commonly used in various machine learning applications. +Returns a Gated Recurrent Unit (GRU) reservoir driver for Echo State Network (`ESN`). +This driver is based on the GRU architecture [^Cho2014]. # Arguments - - `activation_function`: An array of activation functions for the GRU layers. By default, it uses sigmoid activation functions for the update gate, reset gate, and tanh for the hidden state. - - `inner_layer`: An array of inner layers used in the GRU architecture. By default, it uses two dense layers. - - `reservoir`: An array of reservoir layers. By default, it uses two random sparse reservoirs. - - `bias`: An array of bias layers for the GRU. By default, it uses two dense layers. - - `variant`: The GRU variant to use. By default, it uses the "FullyGated" variant. - -# Returns - -A GRUParams object containing the parameters needed for the GRU-based reservoir driver. - -# References - -[^Cho]: Cho, Kyunghyun, et al. + - `activation_function`: An array of activation functions for the GRU layers. + By default, it uses sigmoid activation functions for the update gate, reset gate, + and tanh for the hidden state. + - `inner_layer`: An array of inner layers used in the GRU architecture. + By default, it uses two dense layers. + - `reservoir`: An array of reservoir layers. + By default, it uses two random sparse reservoirs. + - `bias`: An array of bias layers for the GRU. + By default, it uses two dense layers. + - `variant`: The GRU variant to use. + By default, it uses the "FullyGated" variant. + +[^Cho2014]: Cho, Kyunghyun, et al. "_Learning phrase representations using RNN encoder-decoder for statistical machine translation._" arXiv preprint arXiv:1406.1078 (2014). """ -function GRU(; activation_function=[NNlib.sigmoid, NNlib.sigmoid, tanh], +function GRU(; activation_function=[sigmoid, sigmoid, tanh], inner_layer=fill(scaled_rand, 2), reservoir=fill(rand_sparse, 2), bias=fill(scaled_rand, 2), @@ -354,7 +344,7 @@ function next_state!(out, gru::GRUParams, x, y, W, W_in, b, tmp_array) end function allocate_tmp(::GRUParams, tmp_type, res_size) - return [Adapt.adapt(tmp_type, zeros(res_size, 1)) for i in 1:9] + return [adapt(tmp_type, zeros(res_size, 1)) for i in 1:9] end #W=U, W_in=W in papers. x=h, and y=x. I know, it's confusing. ( on the left our notation) diff --git a/src/esn/esn_reservoirs.jl b/src/esn/esn_reservoirs.jl deleted file mode 100644 index 36276099..00000000 --- a/src/esn/esn_reservoirs.jl +++ /dev/null @@ -1,304 +0,0 @@ -""" - rand_sparse(rng::AbstractRNG, ::Type{T}, dims::Integer...; radius=1.0, sparsity=0.1) - -Create and return a random sparse reservoir matrix for use in Echo State Networks (ESNs). The matrix will be of size specified by `dims`, with specified `sparsity` and scaled spectral radius according to `radius`. - -# Arguments - - - `rng`: An instance of `AbstractRNG` for random number generation. - - `T`: The data type for the elements of the matrix. - - `dims`: Dimensions of the reservoir matrix. - - `radius`: The desired spectral radius of the reservoir. Defaults to 1.0. - - `sparsity`: The sparsity level of the reservoir matrix, controlling the fraction of zero elements. Defaults to 0.1. - -# Returns - -A matrix representing the random sparse reservoir. - -# References - -This type of reservoir initialization is commonly used in ESNs for capturing temporal dependencies in data. -""" -function rand_sparse(rng::AbstractRNG, - ::Type{T}, - dims::Integer...; - radius=T(1.0), - sparsity=T(0.1), - std=T(1.0)) where {T <: Number} - lcl_sparsity = T(1) - sparsity #consistency with current implementations - reservoir_matrix = sparse_init(rng, T, dims...; sparsity=lcl_sparsity, std=std) - rho_w = maximum(abs.(eigvals(reservoir_matrix))) - reservoir_matrix .*= radius / rho_w - if Inf in unique(reservoir_matrix) || -Inf in unique(reservoir_matrix) - error("Sparsity too low for size of the matrix. Increase res_size or increase sparsity") - end - return reservoir_matrix -end - -""" - delay_line(rng::AbstractRNG, ::Type{T}, dims::Integer...; weight=0.1) where {T <: Number} - -Create and return a delay line reservoir matrix for use in Echo State Networks (ESNs). A delay line reservoir is a deterministic structure where each unit is connected only to its immediate predecessor with a specified weight. This method is particularly useful for tasks that require specific temporal processing. - -# Arguments - - - `rng`: An instance of `AbstractRNG` for random number generation. This argument is not used in the current implementation but is included for consistency with other initialization functions. - - `T`: The data type for the elements of the matrix. - - `dims`: Dimensions of the reservoir matrix. Typically, this should be a tuple of two equal integers representing a square matrix. - - `weight`: The weight determines the absolute value of all connections in the reservoir. Defaults to 0.1. - -# Returns - -A delay line reservoir matrix with dimensions specified by `dims`. The matrix is initialized such that each element in the `i+1`th row and `i`th column is set to `weight`, and all other elements are zeros. - -# Example - -```julia -reservoir = delay_line(Float64, 100, 100; weight=0.2) -``` - -# References - -This type of reservoir initialization is described in: -Rodan, Ali, and Peter Tino. "Minimum complexity echo state network." IEEE Transactions on Neural Networks 22.1 (2010): 131-144. -""" -function delay_line(rng::AbstractRNG, - ::Type{T}, - dims::Integer...; - weight=T(0.1)) where {T <: Number} - reservoir_matrix = DeviceAgnostic.zeros(rng, T, dims...) - @assert length(dims) == 2&&dims[1] == dims[2] "The dimensions must define a square matrix (e.g., (100, 100))" - - for i in 1:(dims[1] - 1) - reservoir_matrix[i + 1, i] = weight - end - - return reservoir_matrix -end - -""" - delay_line_backward(rng::AbstractRNG, ::Type{T}, dims::Integer...; - weight = T(0.1), fb_weight = T(0.2)) where {T <: Number} - -Create a delay line backward reservoir with the specified by `dims` and weights. Creates a matrix with backward connections -as described in [^Rodan2010]. The `weight` and `fb_weight` can be passed as either arguments or -keyword arguments, and they determine the absolute values of the connections in the reservoir. - -# Arguments - - - `rng::AbstractRNG`: Random number generator. - - `T::Type`: Type of the elements in the reservoir matrix. - - `dims::Integer...`: Dimensions of the reservoir matrix. - - `weight::T`: The weight determines the absolute value of forward connections in the reservoir, and is set to 0.1 by default. - - `fb_weight::T`: The `fb_weight` determines the absolute value of backward connections in the reservoir, and is set to 0.2 by default. - -# Returns - -Reservoir matrix with the dimensions specified by `dims` and weights. - -# References - -[^Rodan2010]: Rodan, Ali, and Peter Tino. "Minimum complexity echo state network." - IEEE transactions on neural networks 22.1 (2010): 131-144. -""" -function delay_line_backward(rng::AbstractRNG, - ::Type{T}, - dims::Integer...; - weight=T(0.1), - fb_weight=T(0.2)) where {T <: Number} - res_size = first(dims) - reservoir_matrix = DeviceAgnostic.zeros(rng, T, dims...) - - for i in 1:(res_size - 1) - reservoir_matrix[i + 1, i] = weight - reservoir_matrix[i, i + 1] = fb_weight - end - - return reservoir_matrix -end - -""" - cycle_jumps(rng::AbstractRNG, ::Type{T}, dims::Integer...; - cycle_weight = T(0.1), jump_weight = T(0.1), jump_size = 3) where {T <: Number} - -Create a cycle jumps reservoir with the specified dimensions, cycle weight, jump weight, and jump size. - -# Arguments - - - `rng::AbstractRNG`: Random number generator. - - `T::Type`: Type of the elements in the reservoir matrix. - - `dims::Integer...`: Dimensions of the reservoir matrix. - - `cycle_weight::T = T(0.1)`: The weight of cycle connections. - - `jump_weight::T = T(0.1)`: The weight of jump connections. - - `jump_size::Int = 3`: The number of steps between jump connections. - -# Returns - -Reservoir matrix with the specified dimensions, cycle weight, jump weight, and jump size. - -# References - -[^Rodan2012]: Rodan, Ali, and Peter Tiňo. "Simple deterministically constructed cycle reservoirs - with regular jumps." Neural computation 24.7 (2012): 1822-1852. -""" -function cycle_jumps(rng::AbstractRNG, - ::Type{T}, - dims::Integer...; - cycle_weight::Number=T(0.1), - jump_weight::Number=T(0.1), - jump_size::Int=3) where {T <: Number} - res_size = first(dims) - reservoir_matrix = DeviceAgnostic.zeros(rng, T, dims...) - - for i in 1:(res_size - 1) - reservoir_matrix[i + 1, i] = cycle_weight - end - - reservoir_matrix[1, res_size] = cycle_weight - - for i in 1:jump_size:(res_size - jump_size) - tmp = (i + jump_size) % res_size - if tmp == 0 - tmp = res_size - end - reservoir_matrix[i, tmp] = jump_weight - reservoir_matrix[tmp, i] = jump_weight - end - - return reservoir_matrix -end - -""" - simple_cycle(rng::AbstractRNG, ::Type{T}, dims::Integer...; - weight = T(0.1)) where {T <: Number} - -Create a simple cycle reservoir with the specified dimensions and weight. - -# Arguments - - - `rng::AbstractRNG`: Random number generator. - - `T::Type`: Type of the elements in the reservoir matrix. - - `dims::Integer...`: Dimensions of the reservoir matrix. - - `weight::T = T(0.1)`: Weight of the connections in the reservoir matrix. - -# Returns - -Reservoir matrix with the dimensions specified by `dims` and weights. - -# References - -[^Rodan2010]: Rodan, Ali, and Peter Tino. "Minimum complexity echo state network." - IEEE transactions on neural networks 22.1 (2010): 131-144. -""" -function simple_cycle(rng::AbstractRNG, - ::Type{T}, - dims::Integer...; - weight=T(0.1)) where {T <: Number} - reservoir_matrix = DeviceAgnostic.zeros(rng, T, dims...) - - for i in 1:(dims[1] - 1) - reservoir_matrix[i + 1, i] = weight - end - - reservoir_matrix[1, dims[1]] = weight - return reservoir_matrix -end - -""" - pseudo_svd(rng::AbstractRNG, ::Type{T}, dims::Integer...; - max_value, sparsity, sorted = true, reverse_sort = false) where {T <: Number} - - Returns an initializer to build a sparse reservoir matrix with the given `sparsity` by using a pseudo-SVD approach as described in [^yang]. - -# Arguments - - - `rng::AbstractRNG`: Random number generator. - - `T::Type`: Type of the elements in the reservoir matrix. - - `dims::Integer...`: Dimensions of the reservoir matrix. - - `max_value`: The maximum absolute value of elements in the matrix. - - `sparsity`: The desired sparsity level of the reservoir matrix. - - `sorted`: A boolean indicating whether to sort the singular values before creating the diagonal matrix. By default, it is set to `true`. - - `reverse_sort`: A boolean indicating whether to reverse the sorted singular values. By default, it is set to `false`. - -# Returns - -Reservoir matrix with the specified dimensions, max value, and sparsity. - -# References - -This reservoir initialization method, based on a pseudo-SVD approach, is inspired by the work in [^yang], which focuses on designing polynomial echo state networks for time series prediction. - -[^yang]: Yang, Cuili, et al. "_Design of polynomial echo state networks for time series prediction._" Neurocomputing 290 (2018): 148-160. -""" -function pseudo_svd(rng::AbstractRNG, - ::Type{T}, - dims::Integer...; - max_value::Number=T(1.0), - sparsity::Number=0.1, - sorted::Bool=true, - reverse_sort::Bool=false) where {T <: Number} - reservoir_matrix = create_diag(rng, T, dims[1], - max_value; - sorted=sorted, - reverse_sort=reverse_sort) - tmp_sparsity = get_sparsity(reservoir_matrix, dims[1]) - - while tmp_sparsity <= sparsity - reservoir_matrix *= create_qmatrix(rng, T, dims[1], - rand_range(rng, T, dims[1]), - rand_range(rng, T, dims[1]), - DeviceAgnostic.rand(rng, T) * T(2) - T(1)) - tmp_sparsity = get_sparsity(reservoir_matrix, dims[1]) - end - - return reservoir_matrix -end - -#hacky workaround for the moment -function rand_range(rng, T, n::Int) - return Int(1 + floor(DeviceAgnostic.rand(rng, T) * n)) -end - -function create_diag(rng::AbstractRNG, ::Type{T}, dim::Number, max_value::Number; - sorted::Bool=true, reverse_sort::Bool=false) where {T <: Number} - diagonal_matrix = DeviceAgnostic.zeros(rng, T, dim, dim) - if sorted == true - if reverse_sort == true - diagonal_values = sort( - DeviceAgnostic.rand(rng, T, dim) .* max_value; rev=true) - diagonal_values[1] = max_value - else - diagonal_values = sort(DeviceAgnostic.rand(rng, T, dim) .* max_value) - diagonal_values[end] = max_value - end - else - diagonal_values = DeviceAgnostic.rand(rng, T, dim) .* max_value - end - - for i in 1:dim - diagonal_matrix[i, i] = diagonal_values[i] - end - - return diagonal_matrix -end - -function create_qmatrix(rng::AbstractRNG, ::Type{T}, dim::Number, - coord_i::Number, - coord_j::Number, - theta::Number) where {T <: Number} - qmatrix = DeviceAgnostic.zeros(rng, T, dim, dim) - - for i in 1:dim - qmatrix[i, i] = 1.0 - end - - qmatrix[coord_i, coord_i] = cos(theta) - qmatrix[coord_j, coord_j] = cos(theta) - qmatrix[coord_i, coord_j] = -sin(theta) - qmatrix[coord_j, coord_i] = sin(theta) - return qmatrix -end - -function get_sparsity(M, dim) - return size(M[M .!= 0], 1) / (dim * dim - size(M[M .!= 0], 1)) #nonzero/zero elements -end diff --git a/src/esn/hybridesn.jl b/src/esn/hybridesn.jl index 13c8b642..129b4f8f 100644 --- a/src/esn/hybridesn.jl +++ b/src/esn/hybridesn.jl @@ -24,8 +24,8 @@ end """ KnowledgeModel(prior_model, u0, tspan, datasize) -Constructs a `Hybrid` variation of Echo State Networks (ESNs) integrating a knowledge-based model -(`prior_model`) with ESNs for advanced training and prediction in chaotic systems. +Constructs a `Hybrid` variation of Echo State Networks (ESNs) [^Pathak2018] +integrating a knowledge-based model (`prior_model`) with ESNs. # Parameters @@ -34,15 +34,7 @@ Constructs a `Hybrid` variation of Echo State Networks (ESNs) integrating a know - `tspan`: Time span as a tuple, indicating the duration for model operation. - `datasize`: The size of the data to be processed. -# Returns - - - A `Hybrid` struct instance representing the combined ESN and knowledge-based model. - -This method is effective for chaotic processes as highlighted in [^Pathak]. - -Reference: - -[^Pathak]: Jaideep Pathak et al. +[^Pathak2018]: Jaideep Pathak et al. "Hybrid Forecasting of Chaotic Processes: Using Machine Learning in Conjunction with a Knowledge-Based Model" (2018). """ @@ -59,11 +51,7 @@ end HybridESN(model, train_data, in_size, res_size; kwargs...) Construct a Hybrid Echo State Network (ESN) model that integrates -traditional Echo State Networks with a predefined knowledge model for -enhanced performance on chaotic systems or complex datasets. This -constructor allows for the creation of a customized ESN architecture by -specifying the reservoir size, input size, and various other parameters that -influence the network's behavior and learning capacity. +traditional Echo State Networks with a predefined knowledge model [^Pathak2018]. # Parameters @@ -79,41 +67,29 @@ influence the network's behavior and learning capacity. # Optional Keyword Arguments - - `input_layer`: A function to initialize the input matrix. Default is `scaled_rand`. - - `reservoir`: A function to initialize the reservoir matrix. Default is `rand_sparse`. - - `bias`: A function to initialize the bias vector. Default is `zeros64`. - - `reservoir_driver`: The driving system for the reservoir. Default is an RNN model. + - `input_layer`: A function to initialize the input matrix. + Default is `scaled_rand`. + - `reservoir`: A function to initialize the reservoir matrix. + Default is `rand_sparse`. + - `bias`: A function to initialize the bias vector. + Default is `zeros32`. + - `reservoir_driver`: The driving system for the reservoir. + Default is an RNN model. - `nla_type`: The type of non-linear activation used in the reservoir. Default is `NLADefault()`. - - `states_type`: Defines the type of states used in the ESN (e.g., standard states). - Default is `StandardStates()`. - - `washout`: The number of initial timesteps to be discarded in the ESN's training phase. - Default is 0. - - `rng`: Random number generator used for initializing weights. Default is the package's - default random number generator. - - `T`: The data type for the matrices (e.g., `Float32`). Influences computational - efficiency and precision. - - `matrix_type`: The type of matrix used for storing the training data. Default is - inferred from `train_data`. - -# Returns - - - A `HybridESN` instance configured according to the provided parameters and - suitable for further training and prediction tasks. - -# Example - -```julia -# Define a KnowledgeModel -km = KnowledgeModel(prior_model_function, u0, (0, 100), 1000) - -# Create a HybridESN -hesn = HybridESN(km, train_data, 10, 100; washout=100) - -# Train and predict -train(hesn, target_data) -prediction = hesn(prediction_object, output_layer) -``` + - `states_type`: Defines the type of states used in the + ESN. Default is `StandardStates()`. + - `washout`: The number of initial timesteps to be + discarded in the ESN's training phase. Default is 0. + - `rng`: Random number generator used for initializing weights. + Default is `Utils.default_rng()`. + - `T`: The data type for the matrices (e.g., `Float32`). + - `matrix_type`: The type of matrix used for storing the training data. + Default is inferred from `train_data`. + +[^Pathak2018]: Jaideep Pathak et al. + "Hybrid Forecasting of Chaotic Processes: + Using Machine Learning in Conjunction with a Knowledge-Based Model" (2018). """ function HybridESN(model, train_data, @@ -133,7 +109,7 @@ function HybridESN(model, if states_type isa AbstractPaddedStates in_size = size(train_data, 1) + 1 - train_data = vcat(Adapt.adapt(matrix_type, ones(1, size(train_data, 2))), + train_data = vcat(adapt(matrix_type, ones(1, size(train_data, 2))), train_data) else in_size = size(train_data, 1) diff --git a/src/predict.jl b/src/predict.jl index 46b89076..ca0f782c 100644 --- a/src/predict.jl +++ b/src/predict.jl @@ -1,3 +1,71 @@ +abstract type AbstractOutputLayer end +abstract type AbstractPrediction end + +#general output layer struct +struct OutputLayer{T, I, S, L} <: AbstractOutputLayer + training_method::T + output_matrix::I + out_size::S + last_value::L +end + +function Base.show(io::IO, ol::OutputLayer) + print(io, "OutputLayer successfully trained with output size: ", ol.out_size) +end + +#prediction types +""" + Generative(prediction_len) + +A prediction strategy that enables models to generate autonomous multi-step +forecasts by recursively feeding their own outputs back as inputs for +subsequent prediction steps. + +# Parameters + + - `prediction_len`: The number of future steps to predict. + +# Description + +The `Generative` prediction method allows a model to perform multi-step +forecasting by using its own previous predictions as inputs for future predictions. + +At each step, the model takes the current input, generates a prediction, +and then incorporates that prediction into the input for the next step. +This recursive process continues until the specified +number of prediction steps (`prediction_len`) is reached. +""" +struct Generative{T} <: AbstractPrediction + prediction_len::T +end + +struct Predictive{I, T} <: AbstractPrediction + prediction_data::I + prediction_len::T +end + +""" + Predictive(prediction_data) + +A prediction strategy for supervised learning tasks, +where a model predicts labels based on a provided set +of input features (`prediction_data`). + +# Parameters + + - `prediction_data`: The input data used for prediction, `feature` x `sample` + +# Description + +The `Predictive` prediction method uses the provided input data +(`prediction_data`) to produce corresponding labels or outputs based +on the learned relationships in the model. +""" +function Predictive(prediction_data) + prediction_len = size(prediction_data, 2) + Predictive(prediction_data, prediction_len) +end + function obtain_prediction(rc::AbstractReservoirComputer, prediction::Generative, x, @@ -48,7 +116,7 @@ end #single matrix for other training methods function output_storing(training_method, out_size, prediction_len, storing_type) - return Adapt.adapt(storing_type, zeros(out_size, prediction_len)) + return adapt(storing_type, zeros(out_size, prediction_len)) end #general storing -> single matrix