diff --git a/book/numerical_methods/curve_fitting.nim b/book/numerical_methods/curve_fitting.nim index 086f328..f0521fa 100644 --- a/book/numerical_methods/curve_fitting.nim +++ b/book/numerical_methods/curve_fitting.nim @@ -2,8 +2,10 @@ import nimib, nimibook import mpfit, ggplotnim nbInit(theme = useNimibook) +nb.useLatex -nbText: """ +# Vindaar's part: +#[ nbText: """ # Curve fitting using [mpfit](https://github.com/Vindaar/nim-mpfit) This section will cover a curve fitting example. It assumes you are familiar with the @@ -29,6 +31,299 @@ yes. """ nbText: """ +""" ]# + +nbText: hlMd""" +## Curve fitting using `numericalnim` +[Curve fitting](https://en.wikipedia.org/wiki/Curve_fitting) is the task of finding the "best" parameters for a given function, +such that it minimizes the error on the given data. The simplest example being, +finding the slope and intersection of a line $y = ax + b$ using such that it minimizes the squared errors +between the data points and the line. +[numericalnim](https://github.com/SciNim/numericalnim) implements the [Levenberg-Marquardt algorithm](https://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm)(`levmarq`) +for solving non-linear least squares problems. One such problem is curve fitting, which aims to find the parameters that (locally) minimizes +the error between the data and the fitted curve. + +The required imports are: +""" + +nbCode: + import numericalnim, ggplotnim, arraymancer, std / [math, sequtils] + +import std / random +randomize(1337) + +nbText: hlMd""" +In some cases you know the actual form of the function you want to fit, +but in other cases you may have to guess and try multiple different ones. +In this tutorial we will assume we know the form but it works the same regardless. +The test curve we will sample points from is +$$f(t) = \alpha + \sin(\beta t + \gamma) e^{-\delta t}$$ +with $\alpha = 0.5, \beta = 6, \gamma = 0.1, \delta = 1$. This will be a decaying sine wave with an offset. +We will add a bit of noise to it as well: +""" + +nbCode: + proc f(alpha, beta, gamma, delta, t: float): float = + alpha + sin(beta * t + gamma) * exp(-delta*t) + + let + alpha = 0.5 + beta = 6.0 + gamma = 0.1 + delta = 1.0 + let t = arraymancer.linspace(0.0, 3.0, 20) + let yClean = t.map_inline: + f(alpha, beta, gamma, delta, x) + let noise = 0.025 + let y = yClean + randomNormalTensor(t.shape[0], 0.0, noise) + +let tDense = arraymancer.linspace(0.0, 3.0, 200) +let yDense = tDense.map_inline: + f(alpha, beta, gamma, delta, x) + +block: + let df = toDf(t, y, tDense, yDense) + df.ggplot(aes("t", "y")) + + geom_point() + + geom_line(aes("tDense", "yDense")) + + ggsave("images/levmarq_rawdata.png") + +nbImage("images/levmarq_rawdata.png") + +nbText: hlMd""" +Here we have the original function along with the sampled points with noise. +Now we have to create a proc that `levmarq` expects. Specifically it +wants all the parameters in a `Tensor` instead of by themselves: +""" + +nbCode: + proc fitFunc(params: Tensor[float], t: float): float = + let alpha = params[0] + let beta = params[1] + let gamma = params[2] + let delta = params[3] + result = f(alpha, beta, gamma, delta, t) + +nbText: hlMd""" +As we can see, all the parameters that we want to fit, are passed in as +a single 1D Tensor that we unpack for clarity here. The only other thing +that is needed is an initial guess of the parameters. We will set them to all 1 here: +""" + +nbCode: + let initialGuess = ones[float](4) + +nbText: "Now we are ready to do the actual fitting:" + +nbCodeInBlock: + let solution = levmarq(fitFunc, initialGuess, t, y) + echo solution + +nbText: hlMd""" +As we can see, the found parameters are very close to the actual ones. But maybe we can do better, +`levmarq` accepts an `options` parameter which is the same as the one described in [Optimization](./optimization.html) +with the addition of the `lambda0` parameter. We can reduce the `tol` and see if we get an even better fit: +""" + +nbCode: + let options = levmarqOptions(tol=1e-10) + let solution = levmarq(fitFunc, initialGuess, t, y, options=options) + echo solution + +nbText: hlMd""" +As we can see, there isn't really any difference. So we can conclude that the +found solution has in fact converged. + +Here's a plot comparing the fitted and original function: +""" + +block: + let ySol = tDense.map_inline: + fitFunc(solution, x) + + var df = toDf({"t": tDense, "original": yDense, "fitted": ySol, "tPoint": t, "yPoint": y}) + df = df.gather(@["original", "fitted"], key="Class", value = "y") + df.ggplot(aes("t", "y", color="Class")) + + geom_line() + + geom_point(aes("tPoint", "yPoint")) + + ggsave("images/levmarq_comparision.png") + +nbImage("images/levmarq_comparision.png") + +nbText: hlMd""" +As we can see, the fitted curve is quite close to the original one. + +## Errors & Uncertainties +We might also want to quantify exactly how good of a fit the computed weights give. +One measure commonly used is [$\chi^2$](https://en.m.wikipedia.org/wiki/Pearson%27s_chi-squared_test): +$$\chi^2 = \sum_i \frac{(y_i - \hat{y}(x_i))^2}{\sigma_i^2}$$ +where $y_i$ and $x_i$ are the measurements, $\hat{y}$ is the fitted curve and $\sigma_i$ +is the standard deviation (uncertainty/error) of each of the measurements. We will use the +noise size we used when generating the samples here, but in general you will have to +figure out yourself what errors are in your situation. It may for example be the resolution +of your measurements or you may have to approximate it using the data itself. + +We now create the error vector and sample the fitted curve: +""" + +nbCode: + let yError = ones_like(y) * noise + let yCurve = t.map_inline: + fitFunc(solution, x) + +nbText: """ +Now we can calculate the $\chi^2$ using numericalnim's `chi2` proc: +""" + +nbCode: + let chi = chi2(y, yCurve, yError) + echo "χ² = ", chi + + +#[ nbCodeInBlock: + var chi2 = 0.0 + for i in 0 ..< y.len: + chi2 += ((y[i] - yCurve[i]) / yError[i]) ^ 2 + echo "χ² = ", chi2 ]# + +nbText: hlMd""" +Great! Now we have a measure of how good the fit is, but what if we add more points? +Then we will get a better fit, but we will also get more points to sum over. +And what if we choose another curve to fit with more parameters? +Then we may be able to get a better fit but we risk overfitting. +[Reduced $\chi^2$](https://en.m.wikipedia.org/wiki/Reduced_chi-squared_statistic) +is a measure which adjusts $\chi^2$ to take these factors into account. +The formula is: +$$\chi^2_{\nu} = \frac{\chi^2}{n_{\text{obs}} - m_{\text{params}}}$$ +where $n_{\text{obs}}$ is the number of observations and $m_{\text{params}}$ +is the number of parameters in the curve. The difference between them is denoted +the degrees of freedom (dof). +This is simplified, the mean $\chi^2$ +score adjusted to penalize too complex curves. + +Let's calculate it! +""" + +nbCode: + let reducedChi = chi / (y.len - solution.len).float + echo "Reduced χ² = ", reducedChi + +nbText: hlMd""" +As a rule of thumb, values around 1 are desirable. If it is much larger +than 1, it indicates a bad fit. And if it is much smaller than 1 it means +that the fit is much better than the uncertainties suggested. This could +either mean that it has overfitted or that the errors were overestimated. + +### Parameter uncertainties +To find the uncertainties of the fitted parameters, we have to calculate +the covariance matrix: +$$\Sigma = \sigma^2 H^{-1}$$ +where $\sigma^2$ is the standard deviation of the residuals and $H$ is +the Hessian of the objective function (we used $\chi^2$). +`numericalnim` can compute the covariance matrix for us using `paramUncertainties`: +""" + +nbCode: + let cov = paramUncertainties(solution, fitFunc, t, y, yError, returnFullCov=true) + echo cov + +nbText: """ +That is the full covariance matrix, but we are only interested in the diagonal elements. +By default `returnFullCov` is false and then we get a 1D Tensor with the diagonal instead: +""" + +nbCode: + let variances = paramUncertainties(solution, fitFunc, t, y, yError) + echo variances + +nbText: """ +It's important to note that these values are the **variances** of the parameters. +So if we want the standard deviations we will have to take the square root: +""" + +nbCode: + let paramUncertainty = sqrt(variances) + echo "Uncertainties: ", paramUncertainty + +nbText: """ +All in all, these are the values and uncertainties we got for each of the parameters: +""" + +nbCode: + echo "α = ", solution[0], " ± ", paramUncertainty[0] + echo "β = ", solution[1], " ± ", paramUncertainty[1] + echo "γ = ", solution[2], " ± ", paramUncertainty[2] + echo "δ = ", solution[3], " ± ", paramUncertainty[3] + + +#[ nbCode: + proc objectiveFunc(params: Tensor[float]): float = + let yCurve = t.map_inline: + fitFunc(params, x) + result = chi2(y, yCurve, yError) #sum(((y - yCurve) /. yError) ^. 2) + +nbText: hlMd""" +Now we approximate $\sigma^2$ by the reduced $\chi^2$ at the fitted parameters: +""" + +nbCode: + let sigma2 = objectiveFunc(solution) / (y.len - solution.len).float + echo "σ² = ", sigma2 + +nbText: """ +The Hessian at the solution can be calculated numerically using `tensorHessian`: +""" + +nbCode: + let H = tensorHessian(objectiveFunc, solution) + echo "H = ", H + +nbText: "Now we calculate the covariance matrix as described above:" + +nbCode: + proc eye(n: int): Tensor[float] = + result = zeros[float](n, n) + for i in 0 ..< n: + result[i,i] = 1 + + proc inv(t: Tensor[float]): Tensor[float] = + result = solve(t, eye(t.shape[0])) + + let cov = sigma2 * H.inv() + echo "Σ = ", cov + +nbText: """ +The diagonal elements of the covariance matrix is the uncertainty (variance) in our parameters, +so we take the square root of them to get the standard deviation: +""" + +nbCode: + proc getDiag(t: Tensor[float]): Tensor[float] = + let n = t.shape[0] + result = newTensor[float](n) + for i in 0 ..< n: + result[i] = t[i,i] + + let paramUncertainty = sqrt(cov.getDiag()) + echo "Uncertainties: ", paramUncertainty + +nbCodeInBlock: + let uncertainties = sqrt(paramUncertainties(solution, fitFunc, y, t, yError)) + echo "Uncertainties: ", uncertainties + +nbText: """ +All in all, these are the values and uncertainties we got for each of the parameters: +""" + +nbCode: + echo "α = ", solution[0], " ± ", paramUncertainty[0] + echo "β = ", solution[1], " ± ", paramUncertainty[1] + echo "γ = ", solution[2], " ± ", paramUncertainty[2] + echo "δ = ", solution[3], " ± ", paramUncertainty[3] ]# + +nbText: hlMd""" +## Further reading +- [numericalnim's documentation on optimization](https://scinim.github.io/numericalnim/numericalnim/optimize.html) """ nbSave diff --git a/book/numerical_methods/integration1d.nim b/book/numerical_methods/integration1d.nim index 0508537..2d15649 100644 --- a/book/numerical_methods/integration1d.nim +++ b/book/numerical_methods/integration1d.nim @@ -345,4 +345,9 @@ only `cumtrapz` and `cumsimpson` are available and that you pass in `y` and `x` nbImage("images/discreteHumpsComparaision.png") +nbText: hlMd""" +## Further reading +- [numericalnim's documentation on integration](https://scinim.github.io/numericalnim/numericalnim/integrate.html) +""" + nbSave() diff --git a/book/numerical_methods/interpolation.nim b/book/numerical_methods/interpolation.nim new file mode 100644 index 0000000..4dc06de --- /dev/null +++ b/book/numerical_methods/interpolation.nim @@ -0,0 +1,269 @@ +import nimib except Value +import nimibook +import std / [strformat] + + +nbInit(theme = useNimibook) +nb.useLatex + +nbText: hlMd""" +# Interpolate discrete data in Nim +Very seldom when dealing with real data do you have an exact function you can evaluate at any point you would like. +Often you have data in a finite number of discrete points instead. Two common ways to solve this problem is to either +interpolate the data or if you know the form it should take you can do curve fitting. In this tutorial we will cover the +interpolation approach in 1D, 2D and 3D for some cases. + +We will be using [numericalnim](https://github.com/SciNim/numericalnim) for this and the imports we will need are: +""" + +nbCode: + import numericalnim, arraymancer, ggplotnim, std/[math, sequtils] + +block Part1: + nbText: hlMd""" +## 1D Interpolation +This is the simplest case of interpolation and `numericalnim` can handle both evenly spaced and variable spacing in 1D. +We will use the function $$f(x) = \sin(3x) (x-2)^2$$ for our benchmarks: +""" + + block: + let t = linspace(0.0, 4.2, 100) + let y = t.mapIt(sin(3*it) * (it - 2)^2) + let df = toDf(t, y) + ggplot(df, aes("t", "y")) + + geom_line() + + ggtitle("f(x)") + + ggsave("images/1dinterpolation_f.png") + + nbImage("images/1dinterpolation_f.png") + + nbText: hlMd""" +Let's get interpolating! We first define the function and sample from it: +""" + + nbCode: + proc f(x: float): float = sin(3*x) * (x - 2)^2 + + let x = linspace(0.0, 4.2, 10) + let y = x.map(f) + + nbText: hlMd""" +Now we are ready to create the interpolator! For 1D there are these options available: +- [Linear](https://en.wikipedia.org/wiki/Linear_interpolation): `newLinear1D` +- [Cubic spline](https://en.wikipedia.org/wiki/Spline_interpolation): `newCubicSpline` (only supports floats) +- [Hermite spline](https://en.wikipedia.org/wiki/Cubic_Hermite_spline): `newHermiteSpline` + +They all have the same API accepting `seq`s of x- and y-values. +The Hermite spline can optionally supply the derivatives at each point as well. +We will be using the Hermite spline without the derivatives, they will be approximated +using finite differences for us. +""" + + nbCode: + let interp = newHermiteSpline(x, y) + + nbText: hlMd""" +Now that we have constructed the interpolator, we can evaluate it using `eval`. +It accepts either a single input or a `seq` of inputs: +""" + + nbCode: + echo interp.eval(1.0) + echo interp.eval(@[1.0, 2.0]) + + nbText: hlMd""" +We can now evaluate it on a denser set of points and compare it to the original function: +""" + + block: + let t = linspace(0.0, 4.2, 100) + let yOriginal = t.map(f) + let yInterp = interp.eval(t) + var df = toDf(x, y, t, yOriginal, yInterp) + df = df.gather(@["yOriginal", "yInterp"], key = "Class", value = "Value") + ggplot(df, aes("t", "Value", color="Class")) + + geom_line() + + geom_point(aes("x", "y")) + + ggsave("images/compare_interp.png") + + nbImage("images/compare_interp.png") + + nbText: hlMd""" +As we can see, the interpolant does a decent job of approximating the function. +It is worse where the function is changing a lot and closer to the original +in the middle where there is less happening. +""" + +block Part2: + nbText: hlMd""" +## 2D interpolation +For 2D interpolation, `numericalnim` offers 3 methods for gridded data: +- [Nearest neighbour](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation): `newNearestNeighbour2D` +- [Bilinear](https://en.wikipedia.org/wiki/Bilinear_interpolation): `newBilinearSpline` +- [Bicubic](https://en.wikipedia.org/wiki/Bicubic_interpolation): `newBicubicSpline` + +and 2 methods for scattered data: + +- [Barycentric](https://www.scratchapixel.com/lessons/3d-basic-rendering/ray-tracing-rendering-a-triangle/barycentric-coordinates.html): `newBarycentric2D` +- [Radial basis function](https://en.wikipedia.org/wiki/Radial_basis_function_interpolation): `newRbf` +The RBF method isn't unique to 2D and works for any dimension, see the next section for more on that. +For this tutorial, we will focus on the Bicubic spline but NearestNeighbour and Bilinear spline have the same API. +Let's first choose a suitable function to interpolate: +$$f(x, y) = e^{-(x^2 + y^2)}$$ +This will be a Gaussian centered around `(0, 0)`, here's a heatmap showing the function: +""" + + block: + let coords = meshgrid(arraymancer.linspace(-1.0, 1.0, 100), arraymancer.linspace(-1.0, 1.0, 100)) + let z = exp(-1.0 * sum(coords *. coords, axis=1)).squeeze() + let df = toDf({"x": coords[_,0].squeeze, "y": coords[_,1].squeeze, "z": z}) + ggplot(df, aes("x", "y", fill = "z")) + + geom_raster() + + xlim(-1, 1) + + ylim(-1, 1) + + ggsave("images/2d_interp_func.png") + + nbImage("images/2d_interp_func.png") + + nbText: hlMd""" +Now we are ready to sample our function. The inputs expected are the limits in $x$ and $y$ along +with the function values in grid as a Tensor. So if we have a 3x3 grid of data points, the Tensor +should have shape 3x3 as well. Because the data is known to be on a grid, you don't have to give +all the points on it, it's enough to only provide the limits as two tuples. +""" + + nbCode: + proc f(x, y: float): float = + exp(-(x*x + y*y)) + + let xlim = (-1.0, 1.0) + let ylim = (-1.0, 1.0) + + let xs = linspace(xlim[0], xlim[1], 5) + let ys = linspace(ylim[0], ylim[1], 5) + var z = zeros[float](xs.len, ys.len) + for i, x in xs: + for j, y in ys: + z[i, j] = f(x, y) + + nbText: "Now we have sampled our function, let's construct the interpolator:" + + nbCode: + let interp = newBicubicSpline(z, xlim, ylim) + + nbText: "The interpolator can now be evaluated using `eval`:" + + nbCode: + echo interp.eval(0.0, 0.0) + + nbText: hlMd""" +We can now plot the function to see how much it resembles the original function (the dots are the sampled points): +""" + + block: + let coords = meshgrid(arraymancer.linspace(-1.0, 0.999, 100), arraymancer.linspace(-1.0, 0.999, 100)) + let sampleCoords = meshgrid(arraymancer.linspace(-1.0, 1.0, 5), arraymancer.linspace(-1.0, 1.0, 5)) + var z = zeros[float](coords.shape[0]) + var exact = zeros[float](coords.shape[0]) + for i in 0 ..< coords.shape[0]: + z[i] = interp.eval(coords[i, 0], coords[i, 1]) + exact[i] = f(coords[i, 0], coords[i, 1]) + let df = toDf({"x": coords[_,0].squeeze, "y": coords[_,1].squeeze, "z": z, "error": abs(exact - z), + "xSample": sampleCoords[_,0].squeeze, "ySample": sampleCoords[_,1].squeeze}) + ggplot(df, aes("x", "y", fill = "z")) + + geom_raster() + + geom_point(aes("xSample", "ySample"), color = "#F92672") + + xlim(-1, 1) + + ylim(-1, 1) + + ggtitle("Interpolator result") + + ggsave("images/2d_interp_eval.png") + + ggplot(df, aes("x", "y", fill="error")) + + geom_raster() + + geom_point(aes("xSample", "ySample"), color = "#F92672") + + xlim(-1, 1) + + ylim(-1, 1) + + ggtitle("Error") + + ggsave("images/2d_interp_error.png") + + nbImage("images/2d_interp_eval.png") + nbImage("images/2d_interp_error.png") + + nbText: hlMd""" +It looks pretty similar to the original function so I'd say it did a good job. +We have also plotted the error in the second image. We can see that the +interpolant is the most accurate close to the sampled points and the least +accurate between them. +""" + +block Part3: + nbText: hlMd""" +## ND interpolation +With the addition of Radial basis function (RBF) interpolation, `numericalnim` now offers an +interpolation method that works for **scattered** data of **arbitrary dimensions**. +The conceptual explanation for how RBF interpolation works is that a Gaussian is placed at +each of the data points. These are then each scaled such that the sum of all the Gaussians +pass through all the data points exactly. + +Further, the implemented method employs localization using Partition of Unity. This is a +method that exploits the fact that a Gaussian decays very quickly. Hence we shouldn't +have to take into account points far away from the point we are interested in. So internally +a grid structure is created such that points are only affected by their neighbors. This both +speeds up the code and also makes it more numerically stable. + +The format of the inputs that is expected is the positions as a `Tensor` of shape `(nPoints, nDims)` +and the function values (can be multi-valued) of shape `(nPoints, nValues)`. In the general case +the points aren't gridded but if you want to create points on a grid you can do it with the +`meshgrid` function. It takes in a `varargs` of `Tensor[float]`, one `Tensor` for each dimension containing the +grid values in that dimension. An example is: +""" + + nbCode: + let x = meshgrid(arraymancer.linspace(-1.0, 1.0, 5), arraymancer.linspace(-1.0, 1.0, 5), arraymancer.linspace(-1.0, 1.0, 5)) + echo x[0..10, _] + + nbText: hlMd""" +which will create a 3D grid with 5 points between -1 and 1 in each of the dimensions. + +Now let's define a function we are going to interpolate: +$$f(x, y, z) = \sin(x) \sin(y) \sin(z)$$ +Here's the code for implementing it: +""" + + nbCode: + proc f(x: Tensor[float]): Tensor[float] = + product(sin(x), axis=1) + + let y = f(x) + + nbText: hlMd""" +Now we can construct the interpolator as such: +""" + + nbCode: + let interp = newRbf(x, y) + + nbText: "Evaluation is done by calling `eval` with the evaluation point(s):" + + nbCode: + let xEval = [[0.5, 0.5, 0.5], [0.6, 0.5, 0.3], [0.75, -0.23, 0.46]].toTensor + echo interp.eval(xEval).squeeze + echo f(xEval).squeeze + + nbText: hlMd""" +As we can see by comparing the exact solution with the interpolant, they are pretty close to each other. + +## Conclusion +As we have seen, you can do all sorts of interpolations in Nim with just a few lines of code. +Have a nice day! + +## Further reading +- [numericalnim's documentation on interpolation](https://scinim.github.io/numericalnim/numericalnim/interpolate.html) +- [numericalnim's documentation on RBFs](https://scinim.github.io/numericalnim/numericalnim/rbf.html) +- [Curve fitting](https://scinim.github.io/getting-started/numerical_methods/curve_fitting.html) is a method you can use +if you know the parametric form of the function of the data you want to interpolate. + +""" + + +nbSave \ No newline at end of file diff --git a/book/numerical_methods/ode.nim b/book/numerical_methods/ode.nim new file mode 100644 index 0000000..88ddeb7 --- /dev/null +++ b/book/numerical_methods/ode.nim @@ -0,0 +1,264 @@ +import nimib except Value +import nimibook +import std / [strformat, strutils] + + +nbInit(theme = useNimibook) +nb.useLatex + +let fixedODESubset = @["heun2", "rk4"] +let adaptiveODESubset = @["rk21", "tsit54"] + +nbText: hlMd""" +# Solve ordinary differential equations in Nim + +Ordinary differential equations (ODEs) describe so many aspects of nature, but many of them do not have +simple solutions you can solve using pen and paper. That is where numerical solutions come in. They allow +us to solve these equations approximately. We will use [numericalnim](https://github.com/SciNim/numericalnim) +for this. The required imports are: +""" + +nbCode: + import ggplotnim, numericalnim, benchy, std / [math, sequtils] + +nbText: hlMd""" +We will use an ODE with a known solution to be able to compare our numerical solution with +the analytical one. The ODE we will solve is: +$$y' = y (1 - y)$$ +$$y(0) = 0.1$$ +which has the solution of a sigmoid: +$$y(t) = \frac{1}{1 + 9 e^{-t}}$$ + +This equation can for example describe a system which grows exponentially at the beginning, but due to +limited amounts of resources the growth stops and it reaches an equilibrium. The solution looks like this: +""" + +block: + let t = linspace(0.0, 10.0, 100) + let y = t.mapIt(1 / (1 + 9*exp(-it))) + let df = toDf(t, y) + ggplot(df, aes("t", "y")) + + geom_line() + + ggtitle("ODE solution") + + ggsave("images/sigmoid_solution.png") + +nbImage("images/sigmoid_solution.png") + +nbText: hlMd""" +## Let's code +Let us create the function for evaluating the derivative $y'(t, y)$: +""" + +nbCode: + proc exact(t: float): float = + 1 / (1 + 9*exp(-t)) + + proc dy(t: float, y: float, ctx: NumContext[float, float]): float = + y * (1 - y) + +nbText: hlMd""" +`numericalnim` expects a function of the form `proc(t: float, y: T, ctx: NumContext[T, float]): T`, +where `T` is the type of the function value (can be `Tensor[float]` if you have a vector-valued function), +and `ctx` is a context variable that allows you to pass parameters or save information between function calls. +Now we need to define a few parameters before we can do the actual solving: +""" + +nbCode: + let t0 = 0.0 + let tEnd = 10.0 + let y0 = 0.1 # initial condition + let tspan = linspace(t0, tEnd, 20) + let odeOptions = newODEoptions(tStart = t0) + +nbText: hlMd""" +Here we define the starting time (`t0`), the value of the function at that time (`y0`), the time points we want to get +the solution at (`tspan`) and the options we want the solver to use (`odeOptions`). There are more options you can pass, +for example the timestep `dt` used for fixed step-size methods and `dtMax` and `dtMin` used by adaptive methods. +The only thing left is to choose which method we want to use. The fixed step-size methods are: +""" + +let references = { + "heun2": "https://en.wikipedia.org/wiki/Heun%27s_method", + "ralston2": "https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Ralston's_method", + "kutta3": "https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Kutta's_third-order_method", + "heun3": "https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Heun's_third-order_method", + "ralston3": "https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Ralston's_third-order_method", + "ssprk3": "https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Third-order_Strong_Stability_Preserving_Runge-Kutta_(SSPRK3)", + "ralston4": "https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Ralston's_fourth-order_method", + "kutta4": "https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#3/8-rule_fourth-order_method", + "rk4": "https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Classic_fourth-order_method", + "rk21": "https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Heun%E2%80%93Euler", + "bs32": "https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Bogacki%E2%80%93Shampine", + "dopri54": "https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Dormand%E2%80%93Prince", + "tsit54": "http://users.uoa.gr/~tsitourasc/RK54_new_v2.pdf", + "vern65": "https://www.sfu.ca/~jverner/" +}.toTable + +proc referenceList(list: openArray[string]): string = + result = "" + for m in list: + result &= "- " + result &= ( + if m in references: + &"[{m}]({references[m]})" + else: + m + ) + result &= '\n' + + +nbText(referenceList(fixedODE)) + +nbText: "And the adaptive methods are:" + +nbText(referenceList(adaptiveODE)) + +nbText: hlMd""" +That is a lot to choose from, but its hard to go wrong with any of the adaptive methods `dopri54, tsit54 & vern65`. So let's use `tsit54`! +""" + +nbCode: + let (t, y) = solveOde(dy, y0, tspan, odeOptions, integrator="tsit54") + let error = abs(y[^1] - exact(t[^1])) + echo "Error: ", error + +nbText: hlMd""" +We call `solveOde` with our parameters and it returns one `seq` with our time points and one with our solution at those points. +We can check the error at the last point and we see that it is around `3e-16`. Let's plot this solution: +""" + +nbCodeInBlock: + let yExact = t.mapIt(exact(it)) + var df = toDf(t, y, yExact) + df = df.gather(["y", "yExact"], key="Class", value="y") + ggplot(df, aes("t", "y", color="Class")) + + geom_line() + + ggtitle("Tsit54 Solution") + + ggsave("images/tsit54_solution.png") + +nbImage("images/tsit54_solution.png") + +nbText: hlMd""" +As we can see, the graphs are very similar so it seems to work :D. + +Now you might be curios how well a few of the other methods performed, and here you have it: +""" + +for m in concat(fixedODESubset, adaptiveODESubset): + let (t, y) = solveOde(dy, y0, tspan, odeOptions, integrator=m) + let error = abs(y[^1] - exact(t[^1])) + nbText: &"- {m}: {error:e}" + +nbText: hlMd""" +We can see that the higher-order methods has a lower error compared to the lower-order ones. Tweaking the +`odeOptions` would probably get them on-par with the others. There is one parameter we haven't talked about though, +the execution time. Let's look at that and see if it bring any further insights: +""" + +nbCodeInBlock: + for m in concat(fixedODESubset, adaptiveODESubset): + benchy.timeIt m: + keep solveOde(dy, y0, tspan, odeOptions, integrator=m) + +nb.blk.code = "" + +nbText: hlMd""" +As we can see, the adaptive methods are orders of magnitude faster while achieving roughly the same errors. +This is because they take fewer and longer steps when the function is flatter and only decrease the +step-size when the function is changing more rapidly. + +## Vector-valued functions +Now let us look at another example which is a simplified version of what you might get from discretizing a PDE, multiple function values. +The equation in question is +$$y'' = -y$$ +$$y(0) = 0$$ +$$y'(0) = 1$$ +This has the simple solution +$$y(t) = \sin(t)$$ +but it is not on the form `y' = ...` that `numericalnim` wants, so we have to rewrite it. +We introduce a new variable $z = y'$ and then we can rewrite the equation like: +$$y'' = (y')' = z' = -y$$ +This gives us a system of ODEs: +$$y' = z$$ +$$z' = -y$$ +$$y(0) = 0$$ +$$z(0) = y'(0) = 1$$ + +This can be written in vector-form to simplify it: +$$\mathbf{v} = [y, z]^T$$ +$$\mathbf{v}' = [z, -y]^T$$ +$$\mathbf{v}(0) = [0, 1]^T$$ + +Now we can write a function which takes as input a $\mathbf{v}$ and returns the derivative of them. +This function can be represented as a matrix multiplication, $\mathbf{v}' = A\mathbf{v}$, with a matrix $A$: +$$A = \begin{bmatrix}0 & 1\\\\-1 & 0\end{bmatrix}$$ + +Let us code this function but let us also take this opportunity to explore the `NumContext` to pass in `A`. +""" + +nbCode: + proc dv(t: float, y: Tensor[float], ctx: NumContext[Tensor[float], float]): Tensor[float] = + ctx.tValues["A"] * y + +nbText: hlMd""" +`NumContext` consists of two tables, `ctx.fValues` which can contain `float`s and `ctx.tValues` which in this case can hold +`Tensor[float]`. So we access `A` which we will insert when creating the context variable: +""" + +nbCode: + var ctx = newNumContext[Tensor[float], float]() + ctx.tValues["A"] = @[@[0.0, 1.0], @[-1.0, 0.0]].toTensor + +nbText: hlMd""" +Now we are ready to setup and run the simulation as we did previously: +""" + +nbCodeInBlock: + let t0 = 0.0 + let tEnd = 20.0 + let y0 = @[@[0.0], @[1.0]].toTensor # initial condition + let tspan = linspace(t0, tEnd, 50) + let odeOptions = newODEoptions(tStart = t0) + + let (t, y) = solveOde(dv, y0, tspan, odeOptions, ctx=ctx, integrator="tsit54") + let yEnd = y[^1][0, 0] + let error = abs(yEnd - sin(tEnd)) + echo "Error: ", error + +nbText: """ +The error is nice and low. The error for the other methods are along with the timings: +""" + +for m in concat(fixedODESubset, adaptiveODESubset): + let t0 = 0.0 + let tEnd = 20.0 + let y0 = @[@[0.0], @[1.0]].toTensor # initial condition + let tspan = linspace(t0, tEnd, 50) + let odeOptions = newODEoptions(tStart = t0) + + let (t, y) = solveOde(dv, y0, tspan, odeOptions, ctx=ctx, integrator=m) + let yEnd = y[^1][0, 0] + let error = abs(yEnd - sin(tEnd)) + nbText: &"- {m}: {error:e}" + +block: + let t0 = 0.0 + let tEnd = 20.0 + let y0 = @[@[0.0], @[1.0]].toTensor # initial condition + let tspan = linspace(t0, tEnd, 50) + let odeOptions = newODEoptions(tStart = t0) + nbCode: + for m in concat(fixedODESubset, adaptiveODESubset): + benchy.timeIt m: + keep solveOde(dv, y0, tspan, odeOptions, ctx=ctx, integrator=m) + + nb.blk.code = "" + +nbText: hlMd""" +We can once again see that the high-order adaptive methods are both more accurate and faster than the fixed-order ones. + +## Further reading +- [numericalnim's documentation on ODEs](https://scinim.github.io/numericalnim/numericalnim/ode.html) +""" + +nbSave \ No newline at end of file diff --git a/book/numerical_methods/optimization.nim b/book/numerical_methods/optimization.nim new file mode 100644 index 0000000..7f004f4 --- /dev/null +++ b/book/numerical_methods/optimization.nim @@ -0,0 +1,143 @@ +import nimib except Value +import nimibook +import std / [strformat] + + +nbInit(theme = useNimibook) +nb.useLatex + +nbText: hlMd""" +# Optimize functions in Nim +Optimization is a common task, you have a function, $f(\theta)$ and would like to find the parameters, $\theta_{\text{min}}$, +that either minimize or maximize the function. We will use [numericalnim](https://github.com/SciNim/numericalnim) +in this tutorial to minimize the [Rosenbrock banana 🍌 function](https://en.wikipedia.org/wiki/Rosenbrock_function): + +$$f(x, y) = (1 - x)^2 + 100(y - x^2)^2$$ + +It looks like this with its minimum at $(x, y) = (1, 1)$: +""" +nbCode: + import ggplotnim, numericalnim, benchy, std / [math] + +nbCodeInBlock: + let (x, y) = meshgridFlat(linspace(-2.0, 2.0, 100).toTensor, linspace(-1.0, 3.0, 100).toTensor) + # Log z to reduce its range + let z = log10((1.0 -. x) ^. 2 + 100.0 * (y - x ^. 2) ^. 2) + let df = toDf(x, y, z) + ggplot(df, aes("x", "y", fill="z")) + + geom_raster() + + ggsave("images/banana_function.png") + +nbClearOutput() + +nbImage("images/banana_function.png") + +nbText: hlMd""" +As you can see the function's shape resembles a banana, and the dark spot +on the right-hand side is the minimum. The format that `numericalnim` expects of the function is: +""" +nbCode: + proc f(theta: Tensor[float]): float = + let x = theta[0] + let y = theta[1] + (1.0 - x) ^ 2 + 100.0 * (y - x ^ 2) ^ 2 + +nbText: hlMd""" +where `theta` is a vector containing all the input arguments, `x` and `y` in this case. + +The optimization methods `numericalnim` offers are: +- [steepestDescent](https://en.wikipedia.org/wiki/Gradient_descent) +- [newton](https://en.wikipedia.org/wiki/Newton%27s_method) +- [bfgs](https://en.wikipedia.org/wiki/Broyden%E2%80%93Fletcher%E2%80%93Goldfarb%E2%80%93Shanno_algorithm) +- [lbfgs](https://en.wikipedia.org/wiki/Limited-memory_BFGS) + +Now we are ready to try out the different methods, the only thing we need to provide is a start guess. +To make it a bit of a challenge, let us start on the other side of the hill at $(-0.5, 2)$. +""" + +nbCodeInBlock: + let theta0 = [-0.5, 2.0].toTensor() + let solutionSteepest = steepestDescent(f, theta0) + echo "Steepest: ", solutionSteepest + let solutionNewton = newton(f, theta0) + echo "Newton: ", solutionNewton + let solutionBfgs = bfgs(f, theta0) + echo "BFGS: ", solutionBfgs + let solutionLbfgs = lbfgs(f, theta0) + echo "LBFGS: ", solutionLbfgs + + benchy.timeIt "Steepest": + keep steepestDescent(f, theta0) + benchy.timeIt "Newton": + keep newton(f, theta0) + benchy.timeIt "BFGS": + keep bfgs(f, theta0) + benchy.timeIt "LBFGS": + keep lbfgs(f, theta0) + +nbText: hlMd""" +As we can see, Newton, BFGS and LBFGS found the exact solution and they did it fast. +Steepest descent didn't find as good a solution and took by far the longest time. +Steepest descent has an order of convergence of $O(N)$, Newton has $O(N^2)$ and (L)BFGS +has one somewhere in-between. So typically Newton will require the least amount of steps. +But it also has the most expensive step to perform. For a small problem like this, +it is not a problem, but say that you have more than a thousand variables in $\theta$, +then it will start to become slow. Typically LBFGS is a good trade-off between number of steps +and the time per step. + +## Analytical gradient +In the example above we did not provide an analytical gradient, so instead numerical gradients was calculated. +This is typically slower than providing the gradient as a function as `f` has to be called multiple times +and the steps are not as accurate. All the above-mentioned methods supports this. +To supply a analytical gradient is to create a function of the following form: +""" + +nbCode: + proc fGradient(theta: Tensor[float]): Tensor[float] = + let x = theta[0] + let y = theta[1] + result = newTensor[float](2) + result[0] = -2 * (1 - x) + 100 * 2 * (y - x*x) * -2 * x + result[1] = 100 * 2 * (y - x*x) + +nbText: hlMd""" +It takes the parameters as input and returns a Tensor of the same size with the gradient with respect to each parameter. +It can then be supplied like this: +""" + +nbCodeInBlock: + let theta0 = [-0.5, 2.0].toTensor() + let solutionLbfgs = lbfgs(f, theta0, analyticGradient=fGradient) + echo "LBFGS (analytic): ", solutionLbfgs + +nbText: hlMd""" +No surprise that it also managed to find the correct solution. + +## Options +There are several options you can provide to the solver. Here's a summary of some of them: +- `tol`: The error tolerance for when to stop. +- `alpha`: The step size to use. +- `fastMode`: Use a lower-order approximation of the derivatives to exchange accuracy for speed. +- `maxIterations`: The maximum number of iterations to run the solver for until stopping. +- `lineSearchCriterion`: Different methods of linesearch (Armijo, Wolfe, WolfeStrong, NoLineSearch). + +These are provided by creating a options object. Each method has its own initializer, for example: +""" + +nbCodeInBlock: + let theta0 = [-0.5, 2.0].toTensor() + + let steepestOption = steepestDescentOptions[float](alpha=1e-3, fastMode=true) + let steepestSolution = steepestDescent(f, theta0, options=steepestOption) + + let lbfgsOption = lbfgsOptions[float](lineSearchCriterion=Wolfe) + let lbfgsSolution = lbfgs(f, theta0, options=lbfgsOption) + +nbText: hlMd""" +## Further reading +- [numericalnim's documentation on optimization](https://scinim.github.io/numericalnim/numericalnim/optimize.html) +- We also have an article on [curve fitting](https://scinim.github.io/getting-started/numerical_methods/curve_fitting.html) which +allows you to optimize parameters to minimize the error between a curve and data points. +""" + +nbSave \ No newline at end of file diff --git a/getting_started.nim b/getting_started.nim index ed3f027..30c2026 100644 --- a/getting_started.nim +++ b/getting_started.nim @@ -11,6 +11,9 @@ var book = initBookWithToc: section("Numerical methods", "numerical_methods/index.md"): entry("Curve fitting", "curve_fitting") entry("Integration (1D)", "integration1d") + entry("ODEs", "ode") + entry("Optimization", "optimization") + entry("Interpolation", "interpolation") section("Data visualization", "data_viz/index.md"): entry("Plotting data", "plotting_data") section("Interfacing with other language", "external_language_integration/index.md"): diff --git a/scinim_getting_started.nimble b/scinim_getting_started.nimble index 8862e8d..41a7247 100644 --- a/scinim_getting_started.nimble +++ b/scinim_getting_started.nimble @@ -11,7 +11,7 @@ binDir = "bin" requires "nim >= 1.2.0" requires "nimib#head" requires "nimibook#280a626a902745b378cc2186374f14c904c9a606" -requires "ggplotnim >= 0.5.1" +requires "ggplotnim == 0.5.6" requires "datamancer >= 0.2.1" requires "mpfit" requires "numericalnim >= 0.7.1"