From 997b1788a4635c66721f052bc0dfde1bb5c9759b Mon Sep 17 00:00:00 2001 From: Greg Date: Mon, 19 Jul 2021 09:14:14 -0700 Subject: [PATCH 1/2] Edits and tutorial fixes. --- docs/src/examples/augmented_neural_ode.md | 2 +- docs/src/examples/mnist_conv_neural_ode.md | 2 +- docs/src/examples/mnist_neural_ode.md | 18 +++++++++--------- docs/src/examples/neural_ode_flux.md | 4 ++-- docs/src/examples/neural_ode_galacticoptim.md | 7 +++++-- docs/src/examples/neural_ode_sciml.md | 14 +++++++------- 6 files changed, 25 insertions(+), 22 deletions(-) diff --git a/docs/src/examples/augmented_neural_ode.md b/docs/src/examples/augmented_neural_ode.md index d530444213..beda2c1d45 100644 --- a/docs/src/examples/augmented_neural_ode.md +++ b/docs/src/examples/augmented_neural_ode.md @@ -26,7 +26,7 @@ function concentric_sphere(dim, inner_radius_range, outer_radius_range, end data = cat(data..., dims=2) labels = cat(labels..., dims=2) - return DataLoader(data |> gpu, labels |> gpu; batchsize=batch_size, shuffle=true, + return DataLoader((data |> gpu, labels |> gpu); batchsize=batch_size, shuffle=true, partial=false) end diff --git a/docs/src/examples/mnist_conv_neural_ode.md b/docs/src/examples/mnist_conv_neural_ode.md index f7cf9bc1fa..1710c537ed 100644 --- a/docs/src/examples/mnist_conv_neural_ode.md +++ b/docs/src/examples/mnist_conv_neural_ode.md @@ -344,7 +344,7 @@ This callback function is used to print both the training and testing accuracy a ```julia cb() = begin global iter += 1 - # Monitor that the weights do infact update + # Monitor that the weights update # Every 10 training iterations show accuracy if iter % 10 == 1 train_accuracy = accuracy(model, train_dataloader) * 100 diff --git a/docs/src/examples/mnist_neural_ode.md b/docs/src/examples/mnist_neural_ode.md index e47c0683d9..df98ef912f 100644 --- a/docs/src/examples/mnist_neural_ode.md +++ b/docs/src/examples/mnist_neural_ode.md @@ -39,7 +39,7 @@ const bs = 128 const train_split = 0.9 train_dataloader, test_dataloader = loadmnist(bs, train_split) -down = Chain(flatten, Dense(784, 20, tanh)) |> gpu +down = Chain(Flux.flatten, Dense(784, 20, tanh)) |> gpu nn = Chain(Dense(20, 10, tanh), Dense(10, 10, tanh), @@ -58,7 +58,7 @@ function DiffEqArray_to_Array(x) return reshape(xarr, size(xarr)[1:2]) end -# Build our over-all model topology +# Build our overall model topology model = Chain(down, nn_ode, DiffEqArray_to_Array, @@ -196,7 +196,7 @@ to the next. Four different sets of layers are used here: ```julia -down = Chain(flatten, Dense(784, 20, tanh)) |> gpu +down = Chain(Flux.flatten, Dense(784, 20, tanh)) |> gpu nn = Chain(Dense(20, 10, tanh), Dense(10, 10, tanh), @@ -227,8 +227,8 @@ fc = Chain(Dense(20, 10)) |> gpu ### Array Conversion -When using `NeuralODE`, we can use the following function as a cheap conversion of `DiffEqArray` -from the ODE solver into a Matrix that can be used in the following layer: +When using `NeuralODE`, this function converts the ODESolution's `DiffEqArray` to +a Matrix (CuArray), and reduces the matrix from 3 to 2 dimensions for use in the next layer. ```julia function DiffEqArray_to_Array(x) @@ -238,7 +238,7 @@ end ``` For CPU: If this function does not automatically fallback to CPU when no GPU is present, we can -change `gpu(x)` with `Array(x)`. +change `gpu(x)` to `Array(x)`. ### Build Topology @@ -246,7 +246,7 @@ change `gpu(x)` with `Array(x)`. Next we connect all layers together in a single chain: ```julia -# Build our over-all model topology +# Build our overall model topology model = Chain(down, nn_ode, DiffEqArray_to_Array, @@ -343,7 +343,7 @@ This callback function is used to print both the training and testing accuracy a ```julia cb() = begin global iter += 1 - # Monitor that the weights do infact update + # Monitor that the weights update # Every 10 training iterations show accuracy if iter % 10 == 1 train_accuracy = accuracy(model, train_dataloader) * 100 @@ -363,7 +363,7 @@ for Neural ODE is given by `nn_ode.p`: ```julia # Train the NN-ODE and monitor the loss and weights. -Flux.train!(loss, params( down, nn_ode.p, fc), zip( x_train, y_train ), opt, cb = cb) +Flux.train!(loss, Flux.params( down, nn_ode.p, fc), zip( x_train, y_train ), opt, cb = cb) ``` ### Expected Output diff --git a/docs/src/examples/neural_ode_flux.md b/docs/src/examples/neural_ode_flux.md index 4fdb3ad504..57592d94b7 100644 --- a/docs/src/examples/neural_ode_flux.md +++ b/docs/src/examples/neural_ode_flux.md @@ -24,7 +24,7 @@ dudt2 = Chain(x -> x.^3, Dense(2,50,tanh), Dense(50,2)) p,re = Flux.destructure(dudt2) # use this p as the initial condition! -dudt(u,p,t) = re(p)(u) # need to restrcture for backprop! +dudt(u,p,t) = re(p)(u) # need to restructure for backprop! prob = ODEProblem(dudt,u0,tspan) function predict_n_ode() @@ -39,7 +39,7 @@ end loss_n_ode() # n_ode.p stores the initial parameters of the neural ODE -cb = function (;doplot=false) #callback function to observe training +cb = function (;doplot=false) # callback function to observe training pred = predict_n_ode() display(sum(abs2,ode_data .- pred)) # plot current prediction against data diff --git a/docs/src/examples/neural_ode_galacticoptim.md b/docs/src/examples/neural_ode_galacticoptim.md index a0b9345e8f..89e119bda9 100644 --- a/docs/src/examples/neural_ode_galacticoptim.md +++ b/docs/src/examples/neural_ode_galacticoptim.md @@ -5,7 +5,7 @@ a lot of the choices, using heuristics to determine a potentially efficient meth However, in some cases you may want more control over the optimization process. The underlying optimization package behind `sciml_train` is [GalacticOptim.jl](https://github.com/SciML/GalacticOptim.jl). -In this tutorial we will show how to more deeply interact with the optimzation +In this tutorial we will show how to more deeply interact with the optimization library to tweak its processes. We can use a neural ODE as our example. A neural ODE is an ODE where a neural @@ -169,11 +169,14 @@ set up custom optimization problems. For more information on the usage of [GalacticOptim.jl](https://github.com/SciML/GalacticOptim.jl), please consult [this](https://galacticoptim.sciml.ai/stable/) documentation. +The `x` and `p` variables in the optimization function are different than +`x` and `p` above. The optimization function runs over the space of parameters of +the original problem, so `x_optimization` == `p_original`. ```julia # Train using the ADAM optimizer adtype = GalacticOptim.AutoZygote() -optf = GalacticOptim.OptimizationFunction((x, p) -> loss_neuralode(x), adtype) +optf = GalacticOptim.OptimizationFunction((x_optimization, p_optimization) -> loss_neuralode(x_optimization), adtype) optfunc = GalacticOptim.instantiate_function(optf, prob_neuralode.p, adtype, nothing) optprob = GalacticOptim.OptimizationProblem(optfunc, prob_neuralode.p) diff --git a/docs/src/examples/neural_ode_sciml.md b/docs/src/examples/neural_ode_sciml.md index 33d10bfaec..dbffce5280 100644 --- a/docs/src/examples/neural_ode_sciml.md +++ b/docs/src/examples/neural_ode_sciml.md @@ -64,7 +64,7 @@ result_neuralode = DiffEqFlux.sciml_train(loss_neuralode, prob_neuralode.p, ## Explanation -Let's get a time series array from the Lotka-Volterra equation as data: +Let's generate a time series array from a cubic equation as data: ```julia using DiffEqFlux, DifferentialEquations, Plots @@ -104,9 +104,9 @@ dudt2 = Chain(x -> x.^3, In our model we used the `x -> x.^3` assumption in the model. By incorporating structure into our equations, we can reduce the required size and training time -for the neural network, but a good guess needs to be known! +for the neural network, but we need a good guess! -From here we build a loss function around it. The `NeuralODE` has an optional +From here, we build a loss function around our `NeuralODE`. `NeuralODE` has an optional second argument for new parameters which we will use to iteratively change the neural network in our training loop. We will use the L2 loss of the network's output against the time series data: @@ -139,18 +139,18 @@ callback = function (p, l, pred; doplot = false) end ``` -We then train the neural network to learn the ODE. Using `sciml_train`, heuristics -are chosen that does this fast and simply: +We then train the neural network to learn the ODE. `sciml_train` chooses heuristics +that train quickly and simply: ```julia result_neuralode = DiffEqFlux.sciml_train(loss_neuralode, prob_neuralode.p, cb = callback) ``` -## Usage without the layer +## Usage Without the Layer Function Note that you can equivalently define the NeuralODE by hand instead of using -the layer function. With `FastChain` this would look like: +the `NeuralODE`. With `FastChain` this would look like: ```julia dudt!(u, p, t) = dudt2(u, p) From a5d077cbf1ec0867a4bc2d849148e8c8f869c201 Mon Sep 17 00:00:00 2001 From: Greg Date: Mon, 19 Jul 2021 09:24:42 -0700 Subject: [PATCH 2/2] Add detailed comments to sciml_train tutorial. --- docs/src/examples/neural_ode_sciml.md | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/docs/src/examples/neural_ode_sciml.md b/docs/src/examples/neural_ode_sciml.md index dbffce5280..882d77d2bf 100644 --- a/docs/src/examples/neural_ode_sciml.md +++ b/docs/src/examples/neural_ode_sciml.md @@ -17,34 +17,40 @@ follow a full explanation of the definition and training process: ```julia using DiffEqFlux, DifferentialEquations, Plots, GalacticOptim -u0 = Float32[2.0; 0.0] -datasize = 30 -tspan = (0.0f0, 1.5f0) -tsteps = range(tspan[1], tspan[2], length = datasize) +u0 = Float32[2.0; 0.0] # Initial condition +datasize = 30 # Number of data points +tspan = (0.0f0, 1.5f0) # Time range +tsteps = range(tspan[1], tspan[2], length = datasize) # Split time range into equal steps for each data point +# Function that will generate the data we are trying to fit function trueODEfunc(du, u, p, t) true_A = [-0.1 2.0; -2.0 -0.1] - du .= ((u.^3)'true_A)' + du .= ((u.^3)'true_A)' # Need transposes to make the matrix multiplication work end +# Define the problem with the function above prob_trueode = ODEProblem(trueODEfunc, u0, tspan) +# Solve and take just the solution array ode_data = Array(solve(prob_trueode, Tsit5(), saveat = tsteps)) -dudt2 = FastChain((x, p) -> x.^3, - FastDense(2, 50, tanh), +# Make a neural net with a NeuralODE layer +dudt2 = FastChain((x, p) -> x.^3, # Guess a cubic function + FastDense(2, 50, tanh), # Multilayer perceptron for the part we don't know FastDense(50, 2)) prob_neuralode = NeuralODE(dudt2, tspan, Tsit5(), saveat = tsteps) +# Array of predictions from NeuralODE with parameters p starting at initial condition u0 function predict_neuralode(p) Array(prob_neuralode(u0, p)) end function loss_neuralode(p) pred = predict_neuralode(p) - loss = sum(abs2, ode_data .- pred) + loss = sum(abs2, ode_data .- pred) # Just sum of squared error return loss, pred end +# Callback function to observe training callback = function (p, l, pred; doplot = true) display(l) # plot current prediction against data @@ -56,6 +62,7 @@ callback = function (p, l, pred; doplot = true) return false end +# Parameters are prob_neuralode.p result_neuralode = DiffEqFlux.sciml_train(loss_neuralode, prob_neuralode.p, cb = callback) ```