diff --git a/docs/src/api/training.md b/docs/src/api/training.md index f0106121..160b7420 100644 --- a/docs/src/api/training.md +++ b/docs/src/api/training.md @@ -2,6 +2,10 @@ ## Linear Models +```@docs + StandardRidge +``` + ## Gaussian Regression Currently, v0.10, is unavailable. diff --git a/ext/RCLIBSVMExt.jl b/ext/RCLIBSVMExt.jl index a4da5267..4cb19242 100644 --- a/ext/RCLIBSVMExt.jl +++ b/ext/RCLIBSVMExt.jl @@ -2,7 +2,8 @@ module RCLIBSVMExt using ReservoirComputing using LIBSVM -function ReservoirComputing.train(svr::LIBSVM.AbstractSVR, states, target) +function ReservoirComputing.train(svr::LIBSVM.AbstractSVR, + states::AbstractArray, target::AbstractArray) out_size = size(target, 1) output_matrix = [] @@ -17,8 +18,8 @@ function ReservoirComputing.train(svr::LIBSVM.AbstractSVR, states, target) return OutputLayer(svr, output_matrix, out_size, target[:, end]) end -function ReservoirComputing.get_prediction( - training_method::LIBSVM.AbstractSVR, output_layer, x) +function ReservoirComputing.get_prediction(training_method::LIBSVM.AbstractSVR, + output_layer::AbstractArray, x::AbstractArray) out = zeros(output_layer.out_size) for i in 1:(output_layer.out_size) diff --git a/ext/RCMLJLinearModelsExt.jl b/ext/RCMLJLinearModelsExt.jl index 432443f8..6a671271 100644 --- a/ext/RCMLJLinearModelsExt.jl +++ b/ext/RCMLJLinearModelsExt.jl @@ -3,8 +3,7 @@ using ReservoirComputing using MLJLinearModels function ReservoirComputing.train(regressor::MLJLinearModels.GeneralizedLinearRegression, - states::AbstractArray{T}, - target::AbstractArray{T}; + states::AbstractArray{T}, target::AbstractArray{T}; kwargs...) where {T <: Number} out_size = size(target, 1) output_layer = similar(target, size(target, 1), size(states, 1)) diff --git a/src/esn/deepesn.jl b/src/esn/deepesn.jl index d9018128..bdd44c08 100644 --- a/src/esn/deepesn.jl +++ b/src/esn/deepesn.jl @@ -11,6 +11,8 @@ struct DeepESN{I, S, N, T, O, M, B, ST, W, IS} <: AbstractEchoStateNetwork states::IS end +const AbstractDriver = Union{AbstractReservoirDriver, GRU} + """ DeepESN(train_data, in_size, res_size; kwargs...) @@ -18,9 +20,7 @@ Constructs a Deep Echo State Network (ESN) model for processing sequential data through a layered architecture of reservoirs. This constructor allows for the creation of a deep learning model that benefits from the dynamic memory and temporal processing capabilities of ESNs, -enhanced by the depth provided by multiple reservoir layers. It's particularly -suited for complex sequential tasks where depth can help capture hierarchical -temporal features. +enhanced by the depth provided by multiple reservoir layers. # Parameters @@ -62,19 +62,12 @@ train_data = rand(Float32, 3, 100) deepESN = DeepESN(train_data, 3, 100; depth=3, washout=100) ``` """ -function DeepESN(train_data, - in_size::Int, - res_size::Int; - depth::Int=2, - input_layer=fill(scaled_rand, depth), - bias=fill(zeros32, depth), - reservoir=fill(rand_sparse, depth), - reservoir_driver=RNN(), - nla_type=NLADefault(), - states_type=StandardStates(), - washout::Int=0, - rng=Utils.default_rng(), - matrix_type=typeof(train_data)) +function DeepESN(train_data::AbstractArray, in_size::Int, res_size::Int; depth::Int=2, + input_layer=fill(scaled_rand, depth), bias=fill(zeros32, depth), + reservoir=fill(rand_sparse, depth), reservoir_driver::AbstractDriver=RNN(), + nla_type::NonLinearAlgorithm=NLADefault(), + states_type::AbstractStates=StandardStates(), washout::Int=0, + rng::AbstractRNG=Utils.default_rng(), matrix_type=typeof(train_data)) if states_type isa AbstractPaddedStates in_size = size(train_data, 1) + 1 train_data = vcat(adapt(matrix_type, ones(1, size(train_data, 2))), diff --git a/src/esn/esn.jl b/src/esn/esn.jl index 07824cc4..d541c258 100644 --- a/src/esn/esn.jl +++ b/src/esn/esn.jl @@ -12,6 +12,8 @@ struct ESN{I, S, N, T, O, M, B, ST, W, IS} <: AbstractEchoStateNetwork states::IS end +const AbstractDriver = Union{AbstractReservoirDriver, GRU} + """ ESN(train_data; kwargs...) -> ESN @@ -52,17 +54,12 @@ julia> esn = ESN(train_data, 10, 300; washout=10) ESN(10 => 300) ``` """ -function ESN(train_data, - in_size::Int, - res_size::Int; - input_layer=scaled_rand, - reservoir=rand_sparse, - bias=zeros32, - reservoir_driver=RNN(), - nla_type=NLADefault(), - states_type=StandardStates(), - washout=0, - rng=Utils.default_rng(), +function ESN(train_data::AbstractArray, in_size::Int, res_size::Int; + input_layer=scaled_rand, reservoir=rand_sparse, bias=zeros32, + reservoir_driver::AbstractDriver=RNN(), + nla_type::NonLinearAlgorithm=NLADefault(), + states_type::AbstractStates=StandardStates(), + washout::Int=0, rng::AbstractRNG=Utils.default_rng(), matrix_type=typeof(train_data)) if states_type isa AbstractPaddedStates in_size = size(train_data, 1) + 1 @@ -85,11 +82,9 @@ function ESN(train_data, end function (esn::AbstractEchoStateNetwork)(prediction::AbstractPrediction, - output_layer::AbstractOutputLayer; - last_state=esn.states[:, [end]], + output_layer::AbstractOutputLayer; last_state=esn.states[:, [end]], kwargs...) pred_len = prediction.prediction_len - return obtain_esn_prediction(esn, prediction, last_state, output_layer; kwargs...) end @@ -133,12 +128,9 @@ julia> output_layer = train(esn, rand(Float32, 3, 90)) OutputLayer successfully trained with output size: 3 ``` """ -function train(esn::AbstractEchoStateNetwork, - target_data, - training_method=StandardRidge(); - kwargs...) +function train(esn::AbstractEchoStateNetwork, target_data::AbstractArray, + training_method=StandardRidge(); kwargs...) states_new = esn.states_type(esn.nla_type, esn.states, esn.train_data[:, 1:end]) - return train(training_method, states_new, target_data; kwargs...) end diff --git a/src/esn/esn_reservoir_drivers.jl b/src/esn/esn_reservoir_drivers.jl index 46e4cda8..a5e207af 100644 --- a/src/esn/esn_reservoir_drivers.jl +++ b/src/esn/esn_reservoir_drivers.jl @@ -22,14 +22,10 @@ specified reservoir driver. update. """ function create_states(reservoir_driver::AbstractReservoirDriver, - train_data, - washout, - reservoir_matrix, - input_matrix, - bias_vector) + train_data::AbstractArray, washout::Int, reservoir_matrix::AbstractMatrix, + input_matrix::AbstractMatrix, bias_vector::AbstractArray) train_len = size(train_data, 2) - washout res_size = size(reservoir_matrix, 1) - states = adapt(typeof(train_data), zeros(res_size, train_len)) tmp_array = allocate_tmp(reservoir_driver, typeof(train_data), res_size) _state = adapt(typeof(train_data), zeros(res_size, 1)) @@ -51,14 +47,10 @@ function create_states(reservoir_driver::AbstractReservoirDriver, end function create_states(reservoir_driver::AbstractReservoirDriver, - train_data, - washout, - reservoir_matrix::Vector, - input_matrix, - bias_vector) + train_data::AbstractArray, washout::Int, reservoir_matrix::Vector, + input_matrix::AbstractArray, bias_vector::AbstractArray) train_len = size(train_data, 2) - washout res_size = sum([size(reservoir_matrix[i], 1) for i in 1:length(reservoir_matrix)]) - states = adapt(typeof(train_data), zeros(res_size, train_len)) tmp_array = allocate_tmp(reservoir_driver, typeof(train_data), res_size) _state = adapt(typeof(train_data), zeros(res_size)) diff --git a/src/esn/hybridesn.jl b/src/esn/hybridesn.jl index 129b4f8f..366799d1 100644 --- a/src/esn/hybridesn.jl +++ b/src/esn/hybridesn.jl @@ -12,6 +12,8 @@ struct HybridESN{I, S, V, N, T, O, M, B, ST, W, IS} <: AbstractEchoStateNetwork states::IS end +const AbstractDriver = Union{AbstractReservoirDriver, GRU} + struct KnowledgeModel{T, K, O, I, S, D} prior_model::T u0::K @@ -91,19 +93,12 @@ traditional Echo State Networks with a predefined knowledge model [^Pathak2018]. "Hybrid Forecasting of Chaotic Processes: Using Machine Learning in Conjunction with a Knowledge-Based Model" (2018). """ -function HybridESN(model, - train_data, - in_size::Int, - res_size::Int; - input_layer=scaled_rand, - reservoir=rand_sparse, - bias=zeros32, - reservoir_driver=RNN(), - nla_type=NLADefault(), - states_type=StandardStates(), - washout=0, - rng=Utils.default_rng(), - T=Float32, +function HybridESN(model::KnowledgeModel, train_data::AbstractArray, + in_size::Int, res_size::Int; input_layer=scaled_rand, reservoir=rand_sparse, + bias=zeros32, reservoir_driver::AbstractDriver=RNN(), + nla_type::NonLinearAlgorithm=NLADefault(), + states_type::AbstractStates=StandardStates(), washout::Int=0, + rng::AbstractRNG=Utils.default_rng(), T=Float32, matrix_type=typeof(train_data)) train_data = vcat(train_data, model.model_data[:, 1:(end - 1)]) @@ -130,8 +125,7 @@ function HybridESN(model, end function (hesn::HybridESN)(prediction::AbstractPrediction, - output_layer::AbstractOutputLayer; - last_state=hesn.states[:, [end]], + output_layer::AbstractOutputLayer; last_state::AbstractArray=hesn.states[:, [end]], kwargs...) km = hesn.model pred_len = prediction.prediction_len @@ -148,10 +142,8 @@ function (hesn::HybridESN)(prediction::AbstractPrediction, kwargs...) end -function train(hesn::HybridESN, - target_data, - training_method=StandardRidge(); - kwargs...) +function train(hesn::HybridESN, target_data::AbstractArray, + training_method=StandardRidge(); kwargs...) states = vcat(hesn.states, hesn.model.model_data[:, 2:end]) states_new = hesn.states_type(hesn.nla_type, states, hesn.train_data[:, 1:end]) diff --git a/src/predict.jl b/src/predict.jl index ca0f782c..18a30bfb 100644 --- a/src/predict.jl +++ b/src/predict.jl @@ -61,16 +61,13 @@ The `Predictive` prediction method uses the provided input data (`prediction_data`) to produce corresponding labels or outputs based on the learned relationships in the model. """ -function Predictive(prediction_data) +function Predictive(prediction_data::AbstractArray) prediction_len = size(prediction_data, 2) Predictive(prediction_data, prediction_len) end -function obtain_prediction(rc::AbstractReservoirComputer, - prediction::Generative, - x, - output_layer, - args...; +function obtain_prediction(rc::AbstractReservoirComputer, prediction::Generative, + x, output_layer::AbstractOutputLayer, args...; initial_conditions=output_layer.last_value) #x = last_state prediction_len = prediction.prediction_len @@ -88,12 +85,8 @@ function obtain_prediction(rc::AbstractReservoirComputer, return output end -function obtain_prediction(rc::AbstractReservoirComputer, - prediction::Predictive, - x, - output_layer, - args...; - kwargs...) +function obtain_prediction(rc::AbstractReservoirComputer, prediction::Predictive, + x, output_layer::AbstractOutputLayer, args...; kwargs...) prediction_len = prediction.prediction_len train_method = output_layer.training_method out_size = output_layer.out_size @@ -110,7 +103,7 @@ function obtain_prediction(rc::AbstractReservoirComputer, end #linear models -function get_prediction(training_method, output_layer, x) +function get_prediction(training_method, output_layer::AbstractOutputLayer, x) return output_layer.output_matrix * x end diff --git a/src/train/linear_regression.jl b/src/train/linear_regression.jl index 1a271cc3..c66ff104 100644 --- a/src/train/linear_regression.jl +++ b/src/train/linear_regression.jl @@ -1,3 +1,22 @@ +@doc raw""" + + StandardRidge([Type], [reg]) + +Returns a training method for `train` based on ridge regression. +The equations for ridge regression are as follows: + +```math +\mathbf{w} = (\mathbf{X}^\top \mathbf{X} + +\lambda \mathbf{I})^{-1} \mathbf{X}^\top \mathbf{y} +``` + +# Arguments + - `Type`: type of the regularization argument. Default is inferred internally, + there's usually no need to tweak this + - `reg`: regularization coefficient. Default is set to 0.0 (linear regression). + +``` +""" struct StandardRidge reg::Number end @@ -10,12 +29,21 @@ function StandardRidge() return StandardRidge(0.0) end -function train(sr::StandardRidge, - states, - target_data) +function train(sr::StandardRidge, states::AbstractArray, target_data::AbstractArray) #A = states * states' + sr.reg * I #b = states * target_data #output_layer = (A \ b)' + + if size(states, 2) != size(target_data, 2) + throw(DimensionMismatch("\n" * + "\n" * + " - Number of columns in `states`: $(size(states, 2))\n" * + " - Number of columns in `target_data`: $(size(target_data, 2))\n" * + "The dimensions of `states` and `target_data` must align for training." * + "\n" + )) + end + output_layer = Matrix(((states * states' + sr.reg * I) \ (states * target_data'))') return OutputLayer(sr, output_layer, size(target_data, 1), target_data[:, end])