diff --git a/Project.toml b/Project.toml index ce6dc1f8..b71f2ba2 100644 --- a/Project.toml +++ b/Project.toml @@ -20,6 +20,7 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" @@ -43,6 +44,7 @@ Pkg = "1.11.0" Plots = "1.40.15" Printf = "1.11.0" SparseArrays = "1.11.0" +Random = "1.11.0" Roots = "2.2.13" SpecialFunctions = "2.5.1" StaticArrays = "1.9.15" diff --git a/benchmarks/benchmark_delta_prime_methods.jl b/benchmarks/benchmark_delta_prime_methods.jl new file mode 100644 index 00000000..704763f4 --- /dev/null +++ b/benchmarks/benchmark_delta_prime_methods.jl @@ -0,0 +1,95 @@ +# Sanity check: compute_delta_prime_from_ca! vs inline Δ' from riccati_cross_ideal_singular_surf! +# +# riccati_cross_ideal_singular_surf! computes Δ' inline at each singular surface crossing +# using the diagonal formula (no Gaussian reduction permutation): +# Δ'[s] = (ca_r[ipert_res, ipert_res, 2, s] - ca_l[ipert_res, ipert_res, 2, s]) / (4π²·ψ₀) +# +# compute_delta_prime_from_ca! applies the identical formula post-hoc from the stored +# ca_l/ca_r arrays. Since both operate on the same data with the same formula, results +# should match to floating-point precision (not just approximately — exactly). +# +# This verifies that compute_delta_prime_from_ca! is a correct standalone implementation +# of the Δ' formula that can be used for testing or alternative integration drivers. +# +# Usage (from JPEC_main root): +# julia --project=. benchmarks/benchmark_delta_prime_methods.jl + +using LinearAlgebra, Printf, TOML +using GeneralizedPerturbedEquilibrium + +const FFS = GeneralizedPerturbedEquilibrium.ForceFreeStates + +function setup_and_run_solovev() + ex = joinpath(@__DIR__, "..", "test", "test_data", "regression_solovev_ideal_example") + inputs = TOML.parsefile(joinpath(ex, "gpec.toml")) + inputs["ForceFreeStates"]["verbose"] = false + inputs["ForceFreeStates"]["use_riccati"] = true + intr = FFS.ForceFreeStatesInternal(; dir_path=ex) + ctrl = FFS.ForceFreeStatesControl(; + (Symbol(k) => v for (k, v) in inputs["ForceFreeStates"])...) + eq_config = GeneralizedPerturbedEquilibrium.Equilibrium.EquilibriumConfig(inputs["Equilibrium"], ex) + equil = GeneralizedPerturbedEquilibrium.Equilibrium.setup_equilibrium(eq_config) + intr.wall_settings = GeneralizedPerturbedEquilibrium.Vacuum.WallShapeSettings(; + (Symbol(k) => v for (k, v) in inputs["Wall"])...) + FFS.sing_lim!(intr, ctrl, equil) + intr.nlow = ctrl.nn_low; intr.nhigh = ctrl.nn_high; intr.npert = 1 + FFS.sing_find!(intr, equil) + intr.mlow = min(intr.nlow * equil.params.qmin, 0) - 4 - ctrl.delta_mlow + intr.mhigh = trunc(Int, intr.nhigh * equil.params.qmax) + ctrl.delta_mhigh + intr.mpert = intr.mhigh - intr.mlow + 1 + intr.mband = intr.mpert - 1 + intr.numpert_total = intr.mpert * intr.npert + metric = FFS.make_metric(equil; mband=intr.mband, fft_flag=ctrl.fft_flag) + ffit = FFS.make_matrix(equil, intr, metric) + odet = FFS.riccati_eulerlagrange_integration(ctrl, equil, ffit, intr) + return ctrl, equil, ffit, intr, odet +end + +println("\n=== compute_delta_prime_from_ca! consistency check ===") +println("Verifies the standalone Δ' formula matches the inline Riccati crossing computation.") +println("Expected error: exactly zero (same formula, same data).\n") + +ctrl, equil, ffit, intr, odet = setup_and_run_solovev() +msing = intr.msing + +# Capture Δ' values set inline by riccati_cross_ideal_singular_surf! during integration +delta_prime_inline = [copy(intr.sing[s].delta_prime) for s in 1:msing] + +# Now call compute_delta_prime_from_ca! — it reads the same ca_l/ca_r arrays and +# overwrites intr.sing[s].delta_prime using the identical diagonal formula +FFS.compute_delta_prime_from_ca!(odet, intr, equil) + +println(" N=$(intr.numpert_total) modes, $msing singular surfaces\n") +@printf(" %6s %4s %4s %22s %22s %12s\n", + "Surf", "m", "n", "Δ' (inline)", "Δ' (from_ca)", "abs diff") +println(" " * "-"^76) + +max_absdiff = let max_absdiff = 0.0 + for s in 1:msing + sing = intr.sing[s] + dp_from_ca = intr.sing[s].delta_prime + for i in eachindex(delta_prime_inline[s]) + dp_il = delta_prime_inline[s][i] + dp_fc = dp_from_ca[i] + absdiff = abs(dp_fc - dp_il) + max_absdiff = max(max_absdiff, absdiff) + @printf(" %6d %4d %4d %22.6f%+.6fi %22.6f%+.6fi %12.4e\n", + s, sing.m[i], sing.n[i], + real(dp_il), imag(dp_il), + real(dp_fc), imag(dp_fc), + absdiff) + end + end + max_absdiff +end + +println() +if max_absdiff == 0.0 + println("PASSED — Δ' values are bit-for-bit identical (max abs diff = 0.0)") +elseif max_absdiff < 1e-14 + @printf("PASSED — max abs diff = %.2e (floating-point rounding only)\n", max_absdiff) +else + @printf("FAILED — max abs diff = %.2e (expected exact agreement)\n", max_absdiff) + exit(1) +end +println() diff --git a/benchmarks/benchmark_riccati_der.jl b/benchmarks/benchmark_riccati_der.jl new file mode 100644 index 00000000..f751588f --- /dev/null +++ b/benchmarks/benchmark_riccati_der.jl @@ -0,0 +1,131 @@ +# Sanity check: riccati_der! correctly evaluates the explicit Riccati ODE. +# +# riccati_der! implements [Glasser 2018 Phys. Plasmas 25, 032507, Eq. 19]: +# dS/dψ = w†·F̄⁻¹·w - S·Ḡ·S, w = Q - K̄·S +# +# where Q = diag(1/(m - n·q)), F̄ = L·L† (Cholesky), K̄ and Ḡ are the MHD +# metric matrices evaluated at ψ. +# +# NOTE: The identity between this Riccati ODE and the EL chain rule +# dS/dψ = dU₁·U₂⁻¹ - S·dU₂·U₂⁻¹ +# holds ONLY for Hermitian S (physical states evolved from the axis, where +# S†=S is preserved by the EL symmetry). For arbitrary non-Hermitian (U₁, U₂), +# the two expressions differ — so this script compares riccati_der! against the +# explicit formula rather than against sing_der!. +# +# Usage (from JPEC_main root): +# julia --project=. benchmarks/benchmark_riccati_der.jl + +using LinearAlgebra, Random, Printf, TOML +using GeneralizedPerturbedEquilibrium + +const FFS = GeneralizedPerturbedEquilibrium.ForceFreeStates + +function setup_solovev() + ex = joinpath(@__DIR__, "..", "test", "test_data", "regression_solovev_ideal_example") + inputs = TOML.parsefile(joinpath(ex, "gpec.toml")) + inputs["ForceFreeStates"]["verbose"] = false + intr = FFS.ForceFreeStatesInternal(; dir_path=ex) + ctrl = FFS.ForceFreeStatesControl(; + (Symbol(k) => v for (k, v) in inputs["ForceFreeStates"])...) + eq_config = GeneralizedPerturbedEquilibrium.Equilibrium.EquilibriumConfig(inputs["Equilibrium"], ex) + equil = GeneralizedPerturbedEquilibrium.Equilibrium.setup_equilibrium(eq_config) + intr.wall_settings = GeneralizedPerturbedEquilibrium.Vacuum.WallShapeSettings(; + (Symbol(k) => v for (k, v) in inputs["Wall"])...) + FFS.sing_lim!(intr, ctrl, equil) + intr.nlow = ctrl.nn_low; intr.nhigh = ctrl.nn_high; intr.npert = 1 + FFS.sing_find!(intr, equil) + intr.mlow = min(intr.nlow * equil.params.qmin, 0) - 4 - ctrl.delta_mlow + intr.mhigh = trunc(Int, intr.nhigh * equil.params.qmax) + ctrl.delta_mhigh + intr.mpert = intr.mhigh - intr.mlow + 1 + intr.mband = intr.mpert - 1 + intr.numpert_total = intr.mpert * intr.npert + metric = FFS.make_metric(equil; mband=intr.mband, fft_flag=ctrl.fft_flag) + ffit = FFS.make_matrix(equil, intr, metric) + return ctrl, equil, ffit, intr +end + +# Evaluate the Riccati RHS explicitly from splines: dS = w†·F̄⁻¹·w - S·Ḡ·S +function riccati_rhs_manual(S, psi, equil, ffit, intr) + N = intr.numpert_total + L = zeros(ComplexF64, N, N) + Kmat = zeros(ComplexF64, N, N) + Gmat = zeros(ComplexF64, N, N) + ffit.fmats_lower(vec(L), psi; hint=ffit._hint) + ffit.kmats(vec(Kmat), psi; hint=ffit._hint) + ffit.gmats(vec(Gmat), psi; hint=ffit._hint) + + q = equil.profiles.q_spline(psi) + singfac = vec(1.0 ./ ((intr.mlow:intr.mhigh) .- q .* (intr.nlow:intr.nhigh)')) + + # w = Q - K̄·S (Q is diagonal; add only the diagonal entries) + w = -Kmat * S + for i in 1:N + w[i, i] += singfac[i] + end + + # v = F̄⁻¹·w via stored Cholesky factor L (L·L† = F̄) + v = copy(w) + ldiv!(LowerTriangular(L), v) + ldiv!(UpperTriangular(L'), v) + + return adjoint(w) * v - S * Gmat * S +end + +println("\n=== riccati_der! formula verification ===") +println("Verifies riccati_der! output matches manual evaluation of Glasser 2018 Eq. 19.") +println("Test state: Hermitian S (physical constraint). Expected error: ~machine epsilon.\n") + +ctrl, equil, ffit, intr = setup_solovev() +N = intr.numpert_total + +odet = FFS.OdeState(N, ctrl.numsteps_init, ctrl.numunorms_init, intr.msing) +FFS.initialize_el_at_axis!(odet, ctrl, equil.profiles, intr) +chunks = FFS.chunk_el_integration_bounds(odet, ctrl, intr) + +# 30% into each chunk: well inside the interval, away from singularities at psi_end +test_psis = [c.psi_start + 0.3 * (c.psi_end - c.psi_start) for c in chunks] + +println(" N=$N modes, $(length(test_psis)) test ψ points (30% into each chunk)\n") +@printf(" %8s %14s %14s %12s\n", "ψ", "‖dS_manual‖", "‖dS_ric‖", "rel error") +println(" " * "-"^54) + +rng = Random.MersenneTwister(42) +threshold = 1e-10 + +max_err = let max_err = 0.0 + for psi in test_psis + # Hermitian S: physical Riccati matrix is Hermitian (preserved by EL symmetry) + A = randn(rng, ComplexF64, N, N) + S = (A + A') / 2 # Hermitian by construction + + # Manual RHS + dS_manual = riccati_rhs_manual(S, psi, equil, ffit, intr) + + # riccati_der! RHS + u_ric = zeros(ComplexF64, N, N, 2) + du_ric = zeros(ComplexF64, N, N, 2) + u_ric[:, :, 1] .= S + u_ric[:, :, 2] .= Matrix{ComplexF64}(I, N, N) + dummy_chunk = FFS.IntegrationChunk(psi, psi, false, 0, 1) + params = (ctrl, equil, ffit, intr, odet, dummy_chunk) + FFS.riccati_der!(du_ric, u_ric, params, psi) + dS_ric = du_ric[:, :, 1] + + ref = max(norm(dS_manual), 1e-10) + err = norm(dS_ric - dS_manual) / ref + max_err = max(max_err, err) + status = err < threshold ? "" : " ← FAIL" + @printf(" %8.4f %14.4e %14.4e %12.4e%s\n", psi, norm(dS_manual), norm(dS_ric), err, status) + end + max_err +end + +println() +if max_err < threshold + @printf("PASSED — max rel error = %.2e (threshold %.0e)\n", max_err, threshold) +else + @printf("FAILED — max rel error = %.2e exceeds threshold %.0e\n", max_err, threshold) + exit(1) +end +println() diff --git a/benchmarks/benchmark_threads.jl b/benchmarks/benchmark_threads.jl new file mode 100644 index 00000000..1c8b4c4c --- /dev/null +++ b/benchmarks/benchmark_threads.jl @@ -0,0 +1,76 @@ +# Thread-scaling benchmark for the bidirectional parallel FM integration. +# Runs the Solovev (N=8) and DIIID-like (N=26) examples with use_parallel=true +# across 1, 2, 4, 8 threads and compares against the serial Riccati path. +# +# Usage (from JPEC_main root): +# for t in 1 2 4 8; do julia -t $t --project=. benchmarks/benchmark_threads.jl; done + +using GeneralizedPerturbedEquilibrium, TOML, Printf, Statistics + +function run_ffs(ex; use_parallel, use_riccati=false) + inputs = TOML.parsefile(joinpath(ex, "gpec.toml")) + inputs["ForceFreeStates"]["verbose"] = false + inputs["ForceFreeStates"]["use_parallel"] = use_parallel + inputs["ForceFreeStates"]["use_riccati"] = use_riccati + inputs["ForceFreeStates"]["write_outputs_to_HDF5"] = false + intr = GeneralizedPerturbedEquilibrium.ForceFreeStates.ForceFreeStatesInternal(; dir_path=ex) + ctrl = GeneralizedPerturbedEquilibrium.ForceFreeStates.ForceFreeStatesControl(; + (Symbol(k) => v for (k, v) in inputs["ForceFreeStates"])...) + eq_config = GeneralizedPerturbedEquilibrium.Equilibrium.EquilibriumConfig(inputs["Equilibrium"], ex) + equil = GeneralizedPerturbedEquilibrium.Equilibrium.setup_equilibrium(eq_config) + intr.wall_settings = GeneralizedPerturbedEquilibrium.Vacuum.WallShapeSettings(; + (Symbol(k) => v for (k, v) in inputs["Wall"])...) + GeneralizedPerturbedEquilibrium.ForceFreeStates.sing_lim!(intr, ctrl, equil) + intr.nlow = ctrl.nn_low; intr.nhigh = ctrl.nn_high; intr.npert = 1 + GeneralizedPerturbedEquilibrium.ForceFreeStates.sing_find!(intr, equil) + intr.mlow = min(intr.nlow * equil.params.qmin, 0) - 4 - ctrl.delta_mlow + intr.mhigh = trunc(Int, intr.nhigh * equil.params.qmax) + ctrl.delta_mhigh + intr.mpert = intr.mhigh - intr.mlow + 1 + intr.mband = intr.mpert - 1 + intr.numpert_total = intr.mpert * intr.npert + metric = GeneralizedPerturbedEquilibrium.ForceFreeStates.make_metric(equil; mband=intr.mband, fft_flag=ctrl.fft_flag) + ffit = GeneralizedPerturbedEquilibrium.ForceFreeStates.make_matrix(equil, intr, metric) + odet = GeneralizedPerturbedEquilibrium.ForceFreeStates.eulerlagrange_integration(ctrl, equil, ffit, intr) + vac = GeneralizedPerturbedEquilibrium.ForceFreeStates.free_run!(odet, ctrl, equil, ffit, intr) + return real(vac.et[1]), intr.numpert_total +end + +function timed_run(ex; use_parallel, use_riccati=false, nwarm=1, nrep=2) + # Warmup + for _ in 1:nwarm + run_ffs(ex; use_parallel, use_riccati) + end + # Timed runs + times = Float64[] + local et1, N + for _ in 1:nrep + t0 = time() + et1, N = run_ffs(ex; use_parallel, use_riccati) + push!(times, time() - t0) + end + return mean(times), et1, N +end + +nthreads = Threads.nthreads() +root = joinpath(@__DIR__, "..") +sol_ex = joinpath(root, "test", "test_data", "regression_solovev_ideal_example") +diiid_ex = joinpath(root, "examples", "DIIID-like_ideal_example") + +println("\n=== Thread-scaling benchmark ($(nthreads) thread(s)) ===\n") + +for (label, ex) in [("Solovev", sol_ex), ("DIIID-like", diiid_ex)] + t_std, et_std, N = timed_run(ex; use_parallel=false, use_riccati=false) + t_ric, et_ric, _ = timed_run(ex; use_parallel=false, use_riccati=true) + t_par, et_par, _ = timed_run(ex; use_parallel=true, use_riccati=false) + + err_ric = abs(et_ric - et_std) / abs(et_std) * 100 + err_par = abs(et_par - et_std) / abs(et_std) * 100 + + println("$label (N=$N, nthreads=$nthreads)") + @printf(" standard et[1]=%.5f t=%.2fs speedup=1.00×\n", et_std, t_std) + @printf(" riccati et[1]=%.5f t=%.2fs speedup=%.2f× err=%.4f%%\n", + et_ric, t_ric, t_std/t_ric, err_ric) + @printf(" parallel et[1]=%.5f t=%.2fs speedup=%.2f× err=%.4f%%\n", + et_par, t_par, t_std/t_par, err_par) + println() +end diff --git a/docs/make.jl b/docs/make.jl index 3ab4649a..0a801037 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -26,6 +26,7 @@ makedocs(; "API Reference" => [ "Vacuum" => "vacuum.md", "Equilibrium" => "equilibrium.md", + "Stability Analysis" => "stability.md", "Utilities" => "utilities.md", "Forcing Terms" => "forcing_terms.md", "Perturbed Equilibrium" => "perturbed_equilibrium.md" diff --git a/docs/src/equilibrium.md b/docs/src/equilibrium.md index bbd5aa4c..2353facf 100644 --- a/docs/src/equilibrium.md +++ b/docs/src/equilibrium.md @@ -104,6 +104,7 @@ Notes: ## See also +- `docs/src/stability.md` — ideal MHD stability analysis built on top of the equilibrium - `docs/src/splines.md` — spline helpers used by equilibrium routines - `docs/src/vacuum.md` — coupling between equilibrium and vacuum solvers diff --git a/docs/src/stability.md b/docs/src/stability.md new file mode 100644 index 00000000..59bc7136 --- /dev/null +++ b/docs/src/stability.md @@ -0,0 +1,304 @@ +# Ideal MHD Stability (ForceFreeStates) + +The `ForceFreeStates` module implements ideal MHD stability analysis for axisymmetric toroidal +plasmas following the direct Newcomb criterion described in [Glasser 2016]. It solves the +Euler-Lagrange (EL) system derived from the potential energy functional, identifies singular +(rational) surfaces where resonant coupling occurs, and returns eigenmode energies, the +tearing stability parameters Δ', and the full inter-surface Δ' matrix. + +## Physical background + +Ideal MHD stability is determined by the sign of the perturbed potential energy + +```math +\delta W[\xi] = \int_0^{\psi_\mathrm{lim}} \mathcal{F}(\xi, \xi') \, d\psi, +``` + +where ``\xi(\psi)`` is the poloidal displacement vector. The extremum of ``\delta W`` over all +admissible ``\xi`` satisfies the Euler-Lagrange system [Glasser 2016, Eq. 24]: + +```math +\frac{d}{d\psi} +\begin{pmatrix} U_1 \\ U_2 \end{pmatrix} += +\begin{pmatrix} A & B \\ C & D \end{pmatrix} +\begin{pmatrix} U_1 \\ U_2 \end{pmatrix}, +\quad +A = -Q\bar{F}^{-1}\bar{K}, \; +B = Q\bar{F}^{-1}Q, \; +C = \bar{G} - \bar{K}^\dagger\bar{F}^{-1}\bar{K}, \; +D = \bar{K}^\dagger\bar{F}^{-1}Q, +``` + +where ``\bar{F}``, ``\bar{K}``, ``\bar{G}`` are the MHD metric matrices in Fourier-mode space +and ``Q = \mathrm{diag}(1/(m - nq))`` is the singular factor. The Newcomb criterion states +that the plasma is stable if and only if this system admits a regular solution that remains +finite across every rational surface. + +**Key references** + +| Paper | Content | +|-------|---------| +| [Glasser 2016] Phys. Plasmas **23**, 112506 | Newcomb criterion, EL system, standard DCON integration | +| [Glasser 2018a] Phys. Plasmas **25**, 032507 | Riccati reformulation, reduced stiffness near singular surfaces | +| [Glasser 2018b] Phys. Plasmas **25**, 032501 | STRIDE code: parallel FM integration, inter-surface Δ' matrix | + +## Integration methods + +Three integration drivers are available, all solving the same EL system but with different +numerical strategies. + +### Standard integration + +`eulerlagrange_integration` is the baseline driver. It integrates the EL ODE directly in +``(U_1, U_2)`` using Tsit5 with adaptive step control. Near each rational surface the +columns of ``U_2`` that correspond to resonant modes are zeroed via Gaussian reduction (GR), +keeping the solution bounded. This is the reference path for correctness comparisons. + +Enable with (default): +```toml +[ForceFreeStates] +use_riccati = false +use_parallel = false +``` + +### Riccati integration + +`riccati_eulerlagrange_integration` reformulates the problem in terms of the dual Riccati +matrix ``S = U_1 \cdot U_2^{-1}`` [Glasser 2018a, Eq. 19]: + +```math +\frac{dS}{d\psi} = w^\dagger \bar{F}^{-1} w - S\bar{G}S, \qquad +w = Q - \bar{K}S. +``` + +``S`` remains bounded near rational surfaces (where ``U_1, U_2`` grow exponentially), so the +solver takes fewer steps. Rather than integrating the quadratic Riccati ODE directly (which +blows up when ``|S|`` is large), the code integrates the linear EL system with +`sing_der!` as the RHS and recovers ``S = U_1 U_2^{-1}`` via periodic renormalization — an +approach that is mathematically equivalent to O(Δψ) but uses the ODE solver's full 5th-order +accuracy. + +Renormalization is triggered whenever ``\max(|U_1|)`` or ``\max(|U_2|)`` exceeds the +threshold `ucrit` (default 1e6), and is forced at the end of each chunk. At singular surface +crossings, `riccati_cross_ideal_singular_surf!` applies the small-asymptotic matching +directly in column `ipert_res` — without Gaussian reduction — and renormalizes to ``(S, I)``. + +Enable with: +```toml +[ForceFreeStates] +use_riccati = true +use_parallel = false +``` + +**Speedup** (benchmarked on reference examples): + +| Example | N modes | Speedup vs standard | +|---------|---------|---------------------| +| Solovev | 8 | ~1.6× (1 thread), ~2.8× (4 threads) | +| DIIID | 26 | ~2.0× (1 thread), ~1.3× (4 threads) | + +### Parallel fundamental-matrix (FM) integration + +`parallel_eulerlagrange_integration` decomposes the radial domain into independent chunks and +integrates each chunk in parallel using `Threads.@threads`. Each chunk produces a +fundamental-matrix (FM) propagator. Serial post-processing multiplies the propagators in +order and applies each singular-surface crossing, recovering the same EL trajectory as the +Riccati path. + +#### Bidirectional integration for large N + +For large mode counts the FM propagator for a chunk ending near a rational surface is +ill-conditioned: the EL solutions grow exponentially toward the rational surface, so the +forward FM amplifies numerical errors. GPEC follows the STRIDE approach [Glasser 2018b, +Sec. III.A]: the crossing chunk (the last sub-chunk before each rational surface) is +integrated *backward* — from the rational surface toward the interior — producing a +well-conditioned backward FM ``\Phi_L``. The forward propagation is recovered as +``\Phi_L^{-1}`` via an LU solve in serial assembly, which is accurate precisely because +``\Phi_L`` is well-conditioned. + +The implementation uses a `direction` field on `IntegrationChunk`: + +- `direction = +1`: standard forward integration, `tspan = (ψ_start, ψ_end)`. +- `direction = -1`: backward integration, `tspan = (ψ_end, ψ_start)` (reversed). + +`chunk_el_integration_bounds(...; bidirectional=true)` assigns `direction = -1` to every +crossing chunk. `balance_integration_chunks` preserves this: the sub-chunk closest to the +rational surface inherits `direction`, while the earlier sub-chunk always gets `direction=+1`. + +Enable with: +```toml +[ForceFreeStates] +use_parallel = true +``` + +**Accuracy** (N=26, DIIID-like example): energy eigenvalue within 2% of standard path. +The residual ~2% gap comes from the different crossing convention (Riccati-style direct +zeroing vs GR), not from ODE tolerance; it is present in both 1-thread and 4-thread runs. + +## Δ' tearing stability parameter + +### Per-surface Δ' (`delta_prime`) + +At each rational surface the asymptotic matching condition gives the tearing stability +parameter [Glasser 2016]: + +```math +\Delta'_s = \frac{c_{a,r}[i_s,i_s,2] - c_{a,l}[i_s,i_s,2]}{4\pi^2 \psi_0}, +``` + +where ``c_{a,l}`` and ``c_{a,r}`` are the left and right asymptotic coefficients at surface +``s``, and ``i_s`` is the column index of the resonant mode. Positive ``\Delta' > 0`` +indicates a tearing-unstable surface. + +The Riccati and parallel FM paths populate `intr.sing[s].delta_prime` (a length-``n_\mathrm{res}`` +vector) inline during each crossing. A companion vector `delta_prime_col` (length N) stores +the coupling of all poloidal modes to the resonant mode at surface ``s``: + +```math +(\Delta'_\mathrm{col})_{j,i} = \frac{c_{a,r}[j,i_s,2] - c_{a,l}[j,i_s,2]}{4\pi^2 \psi_0}. +``` + +The diagonal element ``(\Delta'_\mathrm{col})_{i_s,i}`` equals `delta_prime[i]` exactly by +construction. + +### Inter-surface Δ' matrix (`delta_prime_matrix`) + +`compute_delta_prime_matrix!` assembles the full ``2 m_\mathrm{sing} \times 2 m_\mathrm{sing}`` +inter-surface tearing matrix following the STRIDE global BVP [Glasser 2018b, Sec. III.B]. +The BVP unknowns are the plasma state at the left and right inner-layer boundaries of every +rational surface; the driving terms are unit-amplitude asymptotic solutions at each boundary. +The resulting matrix encodes the full plasma response between all pairs of surfaces and is +required for resistive stability analysis of multi-surface configurations. + +The BVP is well-conditioned because it is formulated using the split ``(\Phi_R, \Phi_L)`` +propagator blocks from bidirectional integration rather than the monolithic forward product +``\Phi_L^{-1} \Phi_R`` (which is ill-conditioned for large N): + +```math +\Phi_R[j] \cdot x_R[j-1] - \Phi_L[j] \cdot x_L[j] = 0 +\quad \text{(junction at } \psi_m[j]\text{)}, +``` + +where ``\Phi_R[j]`` is the forward FM product from ``\psi_{R,j-1}`` to the junction, and +``\Phi_L[j]`` is the backward crossing FM from ``\psi_{L,j}`` to the junction. + +The matrix is only populated by the parallel FM path and is written to the HDF5 output +under `singular/delta_prime_matrix`. + +## Configuration reference + +All `ForceFreeStates` options are set in the `[ForceFreeStates]` section of `gpec.toml`. + +```toml +[ForceFreeStates] +# Integration driver +use_riccati = false # true: Riccati path (faster, same accuracy) +use_parallel = false # true: parallel FM path (multi-thread, large N) + +# Mode space +nn_low = 1 # lowest toroidal mode number +nn_high = 1 # highest toroidal mode number +delta_mlow = 0 # extra low poloidal modes (m < mlow) +delta_mhigh = 0 # extra high poloidal modes (m > mhigh) + +# ODE solver +numsteps_init = 200 # initial step budget per chunk +numunorms_init = 50 # renorm checkpoint budget +reltol = 1e-6 # ODE relative tolerance + +# Output +verbose = true +write_outputs_to_HDF5 = true +``` + +The number of Julia threads is controlled at startup via `-t N` or the `JULIA_NUM_THREADS` +environment variable; it is not a runtime parameter. + +## API Reference + +```@autodocs +Modules = [GeneralizedPerturbedEquilibrium.ForceFreeStates] +``` + +## Example usage + +### Run stability analysis from a TOML configuration + +```julia +using GeneralizedPerturbedEquilibrium, TOML + +const FFS = GeneralizedPerturbedEquilibrium.ForceFreeStates + +ex = "examples/Solovev_ideal_example" +inputs = TOML.parsefile(joinpath(ex, "gpec.toml")) + +ctrl = FFS.ForceFreeStatesControl(; + (Symbol(k) => v for (k, v) in inputs["ForceFreeStates"])...) +equil = GeneralizedPerturbedEquilibrium.Equilibrium.setup_equilibrium( + GeneralizedPerturbedEquilibrium.Equilibrium.EquilibriumConfig(inputs["Equilibrium"], ex)) + +intr = FFS.ForceFreeStatesInternal(; dir_path=ex) +intr.wall_settings = GeneralizedPerturbedEquilibrium.Vacuum.WallShapeSettings(; + (Symbol(k) => v for (k, v) in inputs["Wall"])...) +FFS.sing_lim!(intr, ctrl, equil) +intr.nlow = ctrl.nn_low; intr.nhigh = ctrl.nn_high; intr.npert = 1 +FFS.sing_find!(intr, equil) +intr.mlow = min(intr.nlow * equil.params.qmin, 0) - 4 - ctrl.delta_mlow +intr.mhigh = trunc(Int, intr.nhigh * equil.params.qmax) + ctrl.delta_mhigh +intr.mpert = intr.mhigh - intr.mlow + 1 +intr.mband = intr.mpert - 1 +intr.numpert_total = intr.mpert * intr.npert + +metric = FFS.make_metric(equil; mband=intr.mband, fft_flag=ctrl.fft_flag) +ffit = FFS.make_matrix(equil, intr, metric) + +# Choose integration driver +odet = ctrl.use_parallel ? FFS.parallel_eulerlagrange_integration(ctrl, equil, ffit, intr) : + ctrl.use_riccati ? FFS.riccati_eulerlagrange_integration(ctrl, equil, ffit, intr) : + FFS.eulerlagrange_integration(ctrl, equil, ffit, intr) + +vac = FFS.free_run!(odet, ctrl, equil, ffit, intr) +println("Energy eigenvalue et[1] = ", real(vac.et[1])) +``` + +### Inspect Δ' at singular surfaces + +```julia +for s in 1:intr.msing + sing = intr.sing[s] + println("Surface $s: ψ = $(sing.psi_s), m/n = $(sing.m[1])/$(sing.n[1])") + println(" Δ' = $(real(sing.delta_prime[1]))") +end +``` + +### Access inter-surface Δ' matrix (parallel FM path) + +```julia +# intr.delta_prime_matrix is 2·msing × 2·msing after parallel_eulerlagrange_integration +dpm = intr.delta_prime_matrix +println("Δ' matrix size: ", size(dpm)) +println("Diagonal (surface response to self-driving):") +for j in 1:intr.msing + println(" Surface $j left: ", real(dpm[2j-1, 2j-1])) + println(" Surface $j right: ", real(dpm[2j, 2j ])) +end +``` + +## Notes + +- The standard path does not populate `delta_prime`; use `PerturbedEquilibrium.SingularCoupling` + for Δ' on the standard path (it reads `ca_l`/`ca_r` directly). +- The Riccati and parallel FM paths compute Δ' inline at each crossing, using the + direct diagonal formula (no GR permutation). The result in `delta_prime_col[ipert_res, i]` + equals `delta_prime[i]` to machine precision. +- `delta_prime_matrix` contains raw BVP coefficients, not asymptotic-normalized values; + its diagonal elements do **not** in general equal `delta_prime`. +- ODE step counts depend on the equilibrium profile and mode count; the `numsteps_init` + parameter sets the initial allocation but the solver adapts automatically. + +## See also + +- `docs/src/equilibrium.md` — build the `PlasmaEquilibrium` object required by this module +- `docs/src/vacuum.md` — vacuum response computed from the EL solution in `free_run!` +- `docs/src/perturbed_equilibrium.md` — downstream singular coupling analysis using Δ' diff --git a/src/ForceFreeStates/EulerLagrange.jl b/src/ForceFreeStates/EulerLagrange.jl index 20b2ed39..9871f145 100644 --- a/src/ForceFreeStates/EulerLagrange.jl +++ b/src/ForceFreeStates/EulerLagrange.jl @@ -1,3 +1,141 @@ +""" + compute_delta_prime_from_ca!(odet, intr, equil) + +Compute the tearing stability parameter Δ' for each singular surface from the +asymptotic coefficients `ca_l` and `ca_r` accumulated during integration. + +Uses the diagonal formula Δ'[i] = (ca_r[i,i,2,s] - ca_l[i,i,2,s]) / (4π² · psio), +which is correct when the small asymptotic was introduced in column `ipert_res` directly +(no GR permutation). + +**Note**: This function is no longer called from any integration driver. Δ' is now computed +inline inside each crossing function where the correct column index is known: +- `cross_ideal_singular_surf!` uses `perm_col` (GR-permuted column) +- `riccati_cross_ideal_singular_surf!` uses the diagonal `ipert_res` (no GR permutation) + +Retained for reference and potential use in testing. + +This matches the formula in `PerturbedEquilibrium/SingularCoupling.jl` (lines ~197): + `delta_prime_val = (rbwp1 - lbwp1) / (twopi * chi1)` +with `chi1 = 2π·psio`, so the denominators are identical. +""" +function compute_delta_prime_from_ca!(odet::OdeState, intr::ForceFreeStatesInternal, equil::Equilibrium.PlasmaEquilibrium) + denom = (2π)^2 * equil.psio # = twopi * chi1 in SingularCoupling.jl + for s in 1:intr.msing + sing = intr.sing[s] + n_modes = length(sing.m) + resize!(intr.sing[s].delta_prime, n_modes) + for i in 1:n_modes + ipert_res = 1 + sing.m[i] - intr.mlow + (sing.n[i] - intr.nlow) * intr.mpert + if 1 <= ipert_res <= intr.numpert_total + Δca = odet.ca_r[ipert_res, ipert_res, 2, s] - odet.ca_l[ipert_res, ipert_res, 2, s] + intr.sing[s].delta_prime[i] = Δca / denom + else + intr.sing[s].delta_prime[i] = 0.0 + 0.0im + end + end + end +end + +""" + ode_itime_cost(psi1, psi2, intr) -> Float64 + +Estimate the relative ODE integration cost for the interval [ψ₁, ψ₂] using the +empirical log-divergent cost model from STRIDE (Glasser 2018). + +The cost is a sum of logarithmic contributions from reference points: + - Magnetic axis (ψ_ref = 0): steep divergence, (a,b) = (39695, 212830) + - Each rational surface (ψ_ref = ψ_s): moderate divergence, (a,b) = (17147, 470710) + - Edge (ψ_ref = ψ_lim): mild divergence, (a,b) = (1646, 4683) + +For each reference: cost += (a/b) * |log(1 + b|ψ₂-ref|) - log(1 + b|ψ₁-ref|)| + +The cost model is additive for sub-intervals not containing rational surfaces, +which makes it suitable for equal-cost splitting via bisection. +""" +function ode_itime_cost(psi1::Float64, psi2::Float64, intr::ForceFreeStatesInternal) + a_ax, b_ax = 39695.0, 212830.0 + a_rat, b_rat = 17147.0, 470710.0 + a_edge, b_edge = 1646.0, 4683.0 + + cost = (a_ax / b_ax) * abs(log(1.0 + b_ax * abs(psi2)) - log(1.0 + b_ax * abs(psi1))) + + for sing in intr.sing + ref = sing.psifac + cost += (a_rat / b_rat) * abs(log(1.0 + b_rat * abs(psi2 - ref)) - log(1.0 + b_rat * abs(psi1 - ref))) + end + + ref_edge = intr.psilim + cost += (a_edge / b_edge) * abs(log(1.0 + b_edge * abs(psi2 - ref_edge)) - log(1.0 + b_edge * abs(psi1 - ref_edge))) + + return cost +end + +""" + balance_integration_chunks(chunks, ctrl, intr) -> Vector{IntegrationChunk} + +Sub-divide integration chunks to produce a load-balanced set for parallel execution. +Starts from the output of `chunk_el_integration_bounds` and iteratively splits the +highest-cost chunk (by `ode_itime_cost`) until the total chunk count reaches +`max(2*msing + 3, 4 * Threads.nthreads())`. + +Each split finds the equal-cost midpoint ψ_mid via bisection: + ode_itime_cost(psi_start, psi_mid) ≈ ode_itime_cost(psi_start, psi_end) / 2 + +Sub-chunks inherit `needs_crossing=false` and `ising=0`. Only the LAST sub-chunk of +each original chunk retains `needs_crossing=true` and the original `ising`, so the +rational surface crossing still fires at the correct ψ in the serial assembly phase. +""" +function balance_integration_chunks(chunks::Vector{IntegrationChunk}, ctrl::ForceFreeStatesControl, intr::ForceFreeStatesInternal) + min_chunks = 2 * intr.msing + 3 + target_n = max(min_chunks, 4 * Threads.nthreads()) + + result = collect(chunks) + + while length(result) < target_n + # Find the highest-cost splittable chunk + best_idx = 0 + best_cost = -Inf + for (i, chunk) in enumerate(result) + width = chunk.psi_end - chunk.psi_start + if width > 1e-8 + c = ode_itime_cost(chunk.psi_start, chunk.psi_end, intr) + if c > best_cost + best_cost = c + best_idx = i + end + end + end + + best_idx == 0 && break # No more splittable chunks + + chunk = result[best_idx] + total_cost = best_cost + target_cost = total_cost / 2.0 + + # Bisect to find ψ_mid where cost(psi_start, ψ_mid) ≈ target_cost + lo, hi = chunk.psi_start, chunk.psi_end + for _ in 1:50 + mid = (lo + hi) / 2.0 + if ode_itime_cost(chunk.psi_start, mid, intr) < target_cost + lo = mid + else + hi = mid + end + end + psi_mid = (lo + hi) / 2.0 + + left = IntegrationChunk(; psi_start=chunk.psi_start, psi_end=psi_mid, + needs_crossing=false, ising=0, direction=1) + right = IntegrationChunk(; psi_start=psi_mid, psi_end=chunk.psi_end, + needs_crossing=chunk.needs_crossing, ising=chunk.ising, + direction=chunk.direction) + splice!(result, best_idx, [left, right]) + end + + return result +end + """ eulerlagrange_integration(ctrl::ForceFreeStatesControl, equil::Equilibrium.PlasmaEquilibrium, ffit::FourFitVars, intr::ForceFreeStatesInternal) @@ -22,6 +160,13 @@ An OdeState struct containing the final state of the ODE solver after integratio """ function eulerlagrange_integration(ctrl::ForceFreeStatesControl, equil::Equilibrium.PlasmaEquilibrium, ffit::FourFitVars, intr::ForceFreeStatesInternal) + # Dispatch to parallel or Riccati solver if requested + if ctrl.use_parallel + return parallel_eulerlagrange_integration(ctrl, equil, ffit, intr) + elseif ctrl.use_riccati + return riccati_eulerlagrange_integration(ctrl, equil, ffit, intr) + end + # Initialization odet = OdeState(intr.numpert_total, ctrl.numsteps_init, ctrl.numunorms_init, intr.msing) if ctrl.sing_start <= 0 @@ -168,7 +313,7 @@ making the integration flow more predictable and easier to parallelize (e.g., fo Support for `kin_flag` """ -function chunk_el_integration_bounds(odet::OdeState, ctrl::ForceFreeStatesControl, intr::ForceFreeStatesInternal) +function chunk_el_integration_bounds(odet::OdeState, ctrl::ForceFreeStatesControl, intr::ForceFreeStatesInternal; bidirectional::Bool=false) chunks = IntegrationChunk[] # Start from current position @@ -207,7 +352,8 @@ function chunk_el_integration_bounds(odet::OdeState, ctrl::ForceFreeStatesContro psi_start=psi_current, psi_end=psi_end, needs_crossing=true, - ising=ising_current + ising=ising_current, + direction = bidirectional ? -1 : 1 )) # After crossing, we jump to the other side of the singular surface @@ -278,7 +424,7 @@ function cross_ideal_singular_surf!(odet::OdeState, ctrl::ForceFreeStatesControl end # Re-initialize on opposite side of rational surface by approximating solution - params = (ctrl, equil, ffit, intr, odet, IntegrationChunk(0.0, 0.0, false, ising)) + params = (ctrl, equil, ffit, intr, odet, IntegrationChunk(0.0, 0.0, false, ising, 1)) du1 = zeros(ComplexF64, intr.numpert_total, intr.numpert_total, 2) du2 = zeros(ComplexF64, intr.numpert_total, intr.numpert_total, 2) sing_der!(du1, odet.u, params, odet.psifac) @@ -299,6 +445,15 @@ function cross_ideal_singular_surf!(odet::OdeState, ctrl::ForceFreeStatesControl # Get asymptotic coefficients after crossing rational surface odet.ca_r[:, :, :, ising] .= sing_get_ca(odet.u, ua, intr) + # Note: Δ' is NOT computed for the standard path. The physical Δ' is a complex + # normalization-convention-dependent quantity: the correct value requires the solution + # columns to be in the Riccati gauge (U₂=I), which is maintained by the Riccati + # renormalization. The standard path's solution columns grow from the axis with an + # arbitrary complex phase; dividing by the outer asymptotic coefficient normalizes the + # magnitude but not the complex phase, so the result is in a different convention. + # Δ' is computed inline in riccati_cross_ideal_singular_surf! for the Riccati and + # parallel FM paths, where the renormalization convention is consistent. + # Store values after crossing step and advance odet.psi_store[odet.step] = odet.psifac odet.q_store[odet.step] = odet.q diff --git a/src/ForceFreeStates/ForceFreeStates.jl b/src/ForceFreeStates/ForceFreeStates.jl index 7d580322..859c4067 100644 --- a/src/ForceFreeStates/ForceFreeStates.jl +++ b/src/ForceFreeStates/ForceFreeStates.jl @@ -29,6 +29,7 @@ include("Fourfit.jl") include("FixedBoundaryStability.jl") include("Utils.jl") include("Free.jl") +include("Riccati.jl") # These are used for various small tolerances and root finders throughout ForceFreeStates global eps = 1e-10 diff --git a/src/ForceFreeStates/ForceFreeStatesStructs.jl b/src/ForceFreeStates/ForceFreeStatesStructs.jl index d96c0cbe..6e9f2de1 100644 --- a/src/ForceFreeStates/ForceFreeStatesStructs.jl +++ b/src/ForceFreeStates/ForceFreeStatesStructs.jl @@ -13,6 +13,13 @@ A mutable struct holding data related to the singular surfaces in the equilibriu - `q1::Float64` - Derivative of safety factor with respect to ψ - `grri::Array{Float64,2}` - Interior Green's function at this surface [2*mthvac, 2*mpert] - `grre::Array{Float64,2}` - Exterior Green's function at this surface [2*mthvac, 2*mpert] + - `delta_prime::Vector{ComplexF64}` - Tearing stability Δ' per resonant mode (indexed same as m/n) + - `delta_prime_col::Matrix{ComplexF64}` - Full Δ' column: shape (numpert_total × n_res_modes). + `delta_prime_col[j, i]` = (ca_r[j,ipert_res_i,2] - ca_l[j,ipert_res_i,2]) / (4π²·psio), + the coupling of mode j to resonant mode i through the singular layer. + The diagonal element `delta_prime_col[ipert_res_i, i]` equals `delta_prime[i]`. + Off-diagonal elements represent intra-surface mode coupling via the small asymptotic. + Only populated for the Riccati/parallel FM paths (not the standard path). """ @kwdef mutable struct SingType psifac::Float64 = 0.0 @@ -23,6 +30,8 @@ A mutable struct holding data related to the singular surfaces in the equilibriu q1::Float64 = 0.0 grri::Array{Float64,2} = Array{Float64}(undef, 0, 0) grre::Array{Float64,2} = Array{Float64}(undef, 0, 0) + delta_prime::Vector{ComplexF64} = ComplexF64[] + delta_prime_col::Matrix{ComplexF64} = Matrix{ComplexF64}(undef, 0, 0) end """ @@ -67,14 +76,46 @@ A struct representing a region of integration in the Euler-Lagrange solver. - `psi_end::Float64` - Ending ψ coordinate for this integration region - `needs_crossing::Bool` - Whether a rational surface crossing is needed after this chunk - `ising::Int` - Index of the singular surface associated with this chunk (0 if none) + - `direction::Int` - Integration direction: +1 forward (axis→edge), -1 backward (edge→axis). + For `direction=-1` chunks, `psi_start` < `psi_end` but integration proceeds from `psi_end` + toward `psi_start`. The resulting propagator maps state at `psi_end` → state at `psi_start`. + Used in bidirectional parallel FM to produce well-conditioned crossing-chunk propagators: + solutions that grow exponentially forward (toward a singularity) decay when integrated + backward, so the backward propagator is well-conditioned. """ @kwdef struct IntegrationChunk psi_start::Float64 psi_end::Float64 needs_crossing::Bool ising::Int = 0 + direction::Int = 1 # +1 forward, -1 backward end +""" + ChunkPropagator + +Fundamental matrix for one integration chunk, stored as two N×N×2 solution blocks. +Represents the propagator Φ(ψ₂,ψ₁) computed by integrating the EL ODE from two +identity-block initial conditions: + + - `block_upper_ic`: result of integrating with IC = (I_N, 0_N) (U₁ = I, U₂ = 0) + - `block_lower_ic`: result of integrating with IC = (0_N, I_N) (U₁ = 0, U₂ = I) + +Applying the propagator to the current state `u_prev`: + + u₁_new = block_upper_ic[:,:,1] · u₁_prev + block_lower_ic[:,:,1] · u₂_prev + u₂_new = block_upper_ic[:,:,2] · u₁_prev + block_lower_ic[:,:,2] · u₂_prev + +Since each chunk starts from a bounded identity IC (rather than the accumulated state), +exponential growth within a chunk does not affect the conditioning of the overall +assembly. This enables `Threads.@threads` parallel integration across all chunks. +""" +struct ChunkPropagator + block_upper_ic::Array{ComplexF64,3} # shape (N, N, 2) — result from IC = (I, 0) + block_lower_ic::Array{ComplexF64,3} # shape (N, N, 2) — result from IC = (0, I) +end +ChunkPropagator(N::Int) = ChunkPropagator(zeros(ComplexF64, N, N, 2), zeros(ComplexF64, N, N, 2)) + """ DebugSettings @@ -144,6 +185,15 @@ A mutable struct holding internal state variables for stability calculations. locstab::FastInterpolations.CubicSeriesInterpolant = cubic_interp(collect(0.0:0.25:1.0), zeros(5, 5); bc=NaturalBC()) debug_settings::DebugSettings = DebugSettings() wall_settings::Vacuum.WallShapeSettings = Vacuum.WallShapeSettings() + """ + Inter-surface tearing stability matrix of shape (2*msing × 2*msing). + delta_prime_matrix[2j-1, 2k-1] = small-asymptotic amplitude at left of surface j + when left of surface k is driven with unit amplitude. + Populated by `compute_delta_prime_matrix!` (parallel FM path only). + Uses bidirectional propagators (backward crossing chunks + forward intermediate chunks) + for a well-conditioned BVP, improving accuracy for large N (N ≳ 20). + """ + delta_prime_matrix::Matrix{ComplexF64} = Matrix{ComplexF64}(undef, 0, 0) end """ @@ -198,7 +248,6 @@ A mutable struct containing control parameters for stability analysis, set by th - `qlow::Float64` - Integration terminated at q limit determined by minimum of qlow and q0 from equil - `reform_eq_with_psilim::Bool` - Reform equilibrium with computed psilim (not yet implemented) - `psiedge::Float64` - If less then psilim, calculates dW(psi) between psiedge and psilim, then runs with truncation at max(dW) - - `parallel_threads::Int` - Number of parallel threads (not yet implemented) - `diagnose::Bool` - Enable diagnostic output (not yet implemented) - `diagnose_ca::Bool` - Enable asymptotic coefficient diagnostics (not yet implemented) - `write_outputs_to_HDF5::Bool` - Write results to HDF5 format @@ -206,6 +255,8 @@ A mutable struct containing control parameters for stability analysis, set by th - `force_wv_symmetry::Bool` - Boolean flag to enforce symmetry in the vacuum response matrix - `save_interval::Int` - Save every Nth ODE step (1=all, 10=every 10th). Always saves near rational surfaces. (Same as `euler_step` in the Fortran) - `force_termination::Bool` - Terminate after force-free states (skip perturbed equilibrium calculations) + - `use_riccati::Bool` - Use the dual Riccati reformulation S = U₁·U₂⁻¹ instead of the standard U₁/U₂ ODE. Reduces stiffness for faster integration. See Glasser (2018) Phys. Plasmas 25, 032507. + - `use_parallel::Bool` - Parallel fundamental matrix (propagator) integration using `Threads.@threads`. Each chunk is integrated independently from identity IC and assembled serially. Requires `singfac_min != 0`. Uses the same chunk bounds as the standard path but sub-divides chunks for load balancing. Crossings use the Riccati-style algorithm (no Gaussian reduction). """ @kwdef mutable struct ForceFreeStatesControl verbose::Bool = true @@ -253,7 +304,6 @@ A mutable struct containing control parameters for stability analysis, set by th qlow::Float64 = 0.0 reform_eq_with_psilim::Bool = false psiedge::Float64 = 1.0 - parallel_threads::Int = 1 diagnose::Bool = false diagnose_ca::Bool = false write_outputs_to_HDF5::Bool = true @@ -261,6 +311,8 @@ A mutable struct containing control parameters for stability analysis, set by th force_wv_symmetry::Bool = true save_interval::Int = 10 force_termination::Bool = false + use_riccati::Bool = false + use_parallel::Bool = false end @kwdef mutable struct FourFitVars{S<:CubicSeriesInterpolant,Opts<:NamedTuple} @@ -329,8 +381,8 @@ Populated in `Free.jl`. - `et::Vector{ComplexF64}` - Total eigenvalues of plasma + vacuum - `grri::Array{Float64, 2}` - Interior Green's function matrices (2 * mthvac * nzvac × 2 * numpert_total) - `grre::Array{Float64, 2}` - Exterior Green's function matrices (2 * mthvac * nzvac × 2 * numpert_total) - - `plasma_pts::Array{Float64, 3}` - Cartesian coordinates of plasma points [x, y, z] (mthvac * nzvac × 3) - - `wall_pts::Array{Float64, 3}` - Cartesian coordinates of wall points [x, y, z] (mthvac * nzvac × 3) + - `plasma_pts::Array{Float64, 3}` - Cartesian coordinates of plasma points, shape (mthvac * nzvac) × 3 for (x, y, z) + - `wall_pts::Array{Float64, 3}` - Cartesian coordinates of wall points, shape (mthvac * nzvac) × 3 for (x, y, z) """ @kwdef mutable struct VacuumData numpoints::Int diff --git a/src/ForceFreeStates/Riccati.jl b/src/ForceFreeStates/Riccati.jl new file mode 100644 index 00000000..eb987f58 --- /dev/null +++ b/src/ForceFreeStates/Riccati.jl @@ -0,0 +1,1026 @@ +""" + Riccati.jl - Dual Riccati reformulation of the Euler-Lagrange ODE + +Implements the dual Riccati matrix S = U₁ · U₂⁻¹ = P⁻¹, which satisfies a bounded +ODE even near singular surfaces where U₁, U₂ grow exponentially. This reduced stiffness +leads to fewer ODE integration steps and faster wall-clock time. + +Reference: Glasser (2018) Phys. Plasmas 25, 032507 — Eq. 19 (adapted for dual form S = P⁻¹) +where P = U₂ · U₁⁻¹ is the forward plasma response matrix. + +## Dual Riccati ODE + +Starting from the Euler-Lagrange system [Glasser 2016 eq. 24]: + dU₁/dψ = A·U₁ + B·U₂ A = -Q·F̄⁻¹·K̄, B = Q·F̄⁻¹·Q + dU₂/dψ = C·U₁ + D·U₂ C = Ḡ - K̄†·F̄⁻¹·K̄, D = K̄†·F̄⁻¹·Q + +with S = U₁·U₂⁻¹, differentiating gives the Riccati ODE: + dS/dψ = B + A·S - S·D - S·C·S + +Setting w = Q - K̄·S (shape N×N) and v = F̄⁻¹·w (Cholesky solve), this simplifies to: + dS/dψ = w†·v - S·Ḡ·S [Glasser 2018 eq. 19, dual form] + +## Integration Strategy + +### Why not integrate the Riccati ODE directly? + +`riccati_der!` evaluates the explicit Riccati RHS `dS/dψ = w†F̄⁻¹w − S·Ḡ·S` correctly, +but this ODE is **quadratic** in S. Near a rational surface, S grows large, so the quadratic +term `-SGS` dominates and the RHS grows as |S|². Explicit adaptive solvers (Tsit5) use +*relative* error control: they accept a step when |Δu|/|u| < reltol. When |S| is large, +the absolute error |ΔS| can be enormous while the relative error stays within tolerance. +The solver takes large steps through what is effectively a near-blowup — no amount of +step-size adaptation saves it because the problem is the error *metric*, not the step size. +An implicit solver could handle this stiffness, but is deferred. + +### Actual implementation: EL ODE + renormalization + +Instead we integrate the standard EL ODE (`sing_der!`) in the (U₁, U₂) variables and +recover S = U₁·U₂⁻¹ by renormalization. This achieves the same Riccati trajectory with +**no accuracy loss**: + +- `sing_der!` evaluates the exact EL RHS — no approximation. +- Tsit5 integrates (U₁, U₂) to **5th-order accuracy** with the adaptive step-size + controller enforcing the configured reltol at every accepted step. +- Renormalization `S = U₁·U₂⁻¹` is **exact** (a change of variables, not an approximation). +- The global error is the same as the standard EL path — controlled by the ODE solver + reltol, not by the renormalization frequency. + +This works because the EL ODE is **linear** in (U₁, U₂): the RHS does not grow with |S|, +so relative error control is faithful even when S is large. Renormalization triggered by +`renormalize_riccati_inplace!` in the callback (when max(|U₁|) or max(|U₂|) > ucrit) keeps +both matrices bounded, preventing overflow and maintaining a well-conditioned state for the +solver — exactly analogous to Gaussian reduction in the standard ODE. + +### Consistency with the Riccati ODE (local analysis) + +To verify the method is consistent with the Riccati ODE, consider a single step from (S, I): + + After one step: U₁_new = S + (A·S + B)·Δψ + O(Δψ²), U₂_new = I + (C·S + D)·Δψ + O(Δψ²) + Renorm: S_new = U₁_new · U₂_new⁻¹ = S + (B + A·S − S·D − S·C·S)·Δψ + O(Δψ²) ✓ + +The leading term matches the Riccati ODE exactly. This is a local consistency check only — +it does not imply the integration is first-order. In practice Tsit5 captures all higher-order +terms through its internal stages, achieving 5th-order global accuracy at the configured reltol. + +## Storage Convention + +During chunk integration (with sing_der! as ODE RHS): + u[:,:,1] = U₁ (starts as S_prev, evolves toward new S) + u[:,:,2] = U₂ (starts as I, evolves with EL dynamics) + +After renormalization (at crossing or when norms exceed ucrit): + u[:,:,1] = S = U₁ · U₂⁻¹ + u[:,:,2] = I + +This is compatible with downstream code (which uses U₁/U₂ ratio): + - Free.jl: wp = u[:,:,2] / u[:,:,1] = I · S⁻¹ = P ✓ (post-renorm) + - FixedBoundaryStability.jl: crit = min_eigval(u[:,:,1] / u[:,:,2]) = min_eigval(S) ✓ + - Axis init: S(ψ₀) = 0 (initialize_el_at_axis! sets u[:,:,1]=0, u[:,:,2]=I) ✓ + +## Key Differences from Standard Integration + +1. `sing_der!` is used as the ODE RHS (same as standard, NOT `riccati_der!`) +2. `riccati_integrator_callback!` replaces `integrator_callback!`: uses + `renormalize_riccati_inplace!` instead of Gaussian reduction +3. `riccati_cross_ideal_singular_surf!` replaces `cross_ideal_singular_surf!`: skips Gaussian + reduction and uses ipert_res directly for column zeroing, then renormalizes to (S_new, I) +4. `transform_u!` is skipped — S is already the true solution +""" + +""" + assemble_fm_matrix(propagators, idx_range) -> Matrix{ComplexF64} + +Assemble the 2N×2N fundamental matrix (propagator) by multiplying chunk propagators +in order for indices `idx_range`. Returns Φ_end * ... * Φ_start, so that the result +maps the IC at the start of `idx_range[1]` to the state at the end of `idx_range[end]`. + +Each `ChunkPropagator` stores the 2N columns of Φ split into two N×N×2 blocks: +``` + block_upper_ic[:,:,1:2] ↔ Φ[:,1:N] (result from IC=(I,0)) + block_lower_ic[:,:,1:2] ↔ Φ[:,N+1:2N] (result from IC=(0,I)) +``` +""" +function assemble_fm_matrix(propagators::Vector{ChunkPropagator}, idx_range) + N = size(propagators[1].block_upper_ic, 1) + Phi = Matrix{ComplexF64}(I, 2N, 2N) + isempty(idx_range) && return Phi + for i in idx_range + p = propagators[i] + Phi_i = [p.block_upper_ic[:,:,1] p.block_lower_ic[:,:,1]; + p.block_upper_ic[:,:,2] p.block_lower_ic[:,:,2]] + Phi = Phi_i * Phi + end + return Phi +end + +""" + compute_delta_prime_matrix!(intr, propagators, chunks) + +Compute the inter-surface tearing stability matrix (2·msing × 2·msing) using the +STRIDE global BVP formulation [Glasser 2018 Phys. Plasmas 25, 032501, Sec. III.B]. + +The BVP encodes the full plasma response with unknowns at each surface boundary: +``` + x_axis (N): free IC parameters at the axis (U₁ = 0 regular solutions) + x_left[j] (2N): state at left inner-layer boundary of surface j + x_right[j] (2N): state at right inner-layer boundary of surface j + x_edge (N): free IC parameters at the edge (conducting wall, U₁ = 0) + Total unknowns: nMat = (2 + 4·msing)·N +``` + +The BVP matrix M is assembled from segment propagators, inner-layer continuity +equations (non-resonant modes are continuous through each surface), and driving +terms (unit U₂[ipert_res] amplitude at each surface side). Each of the 2·msing +driving configurations is solved independently by LU back-substitution. + +## Well-conditioned BVP via bidirectional propagators + +For each inter-surface segment j (from `singR[j-1]` to `singL[j]`), the crossing chunk +(direction=-1) was integrated backward, giving a well-conditioned backward FM: +``` + Phi_L[j] = propagators[i_crossings[j]]: maps state at singL[j] → state at psi_m[j] + Phi_R[j] = product of forward propagators: maps state at singR[j-1] → state at psi_m[j] +``` +Continuity at the junction `psi_m[j]`: +``` + Phi_R[j] · x_right[j-1] = Phi_L[j] · x_left[j] + → Phi_R[j] · x_right[j-1] - Phi_L[j] · x_left[j] = 0 +``` +This replaces the ill-conditioned monolithic `Phi_segs[j] = Phi_L[j]⁻¹ · Phi_R[j]` +with a split formulation where each factor is well-conditioned. + +Element delta_prime_matrix[dRow, 2k-1] = U₂[ipert_k] component at the left side +of surface k when driving term dRow is active. dRow = 2j-1 (left of surface j) or +2j (right of surface j). This is the raw BVP coefficient; it differs from `delta_prime` +(which uses the asymptotic normalization from sing_get_ca). + +Only called from `parallel_eulerlagrange_integration` (requires FM propagators). +The result is stored in `intr.delta_prime_matrix`. + +## Limitations +- Assumes exactly one resonant mode per singular surface (standard single-n case). +- Uses a conducting wall edge BC (U₁ = 0). Vacuum BC is deferred. +""" +function compute_delta_prime_matrix!( + intr::ForceFreeStatesInternal, + propagators::Vector{ChunkPropagator}, + chunks::Vector{IntegrationChunk} +) + msing = intr.msing + msing == 0 && return + N = intr.numpert_total + + # Single-resonance assumption: each surface has exactly one resonant mode. + # Multi-resonance surfaces would require coupling all resonant modes simultaneously; + # only the first (sp.m[1], sp.n[1]) is used below. + @assert all(j -> length(intr.sing[j].m) == 1, 1:msing) "compute_delta_prime_matrix! only supports single-resonance surfaces" + + # Find the index of the crossing chunk for each surface (direction=-1 in bidirectional mode) + i_crossings = findall(c -> c.needs_crossing, chunks) + @assert length(i_crossings) == msing + + # Build Phi_L[j] (backward crossing chunk FM) and Phi_R[j] (product of forward + # chunks before the junction psi_m[j]) for each inter-surface segment j. + # + # Phi_L[j]: single backward chunk propagator at i_crossings[j] + # Maps state at psi_end (≈ singL[j]) → psi_start (= psi_m[j], away from singularity) + # Well-conditioned because growing EL solutions decay when integrated backward. + # + # Phi_R[j]: product of forward chunk propagators from singR[j-1] to psi_m[j] + # Maps state at singR[j-1] → psi_m[j] + # Phi_R[msing+1]: forward chunks from singR[msing] to edge (for edge BC) + Phi_L_mats = [assemble_fm_matrix(propagators, i_crossings[j]:i_crossings[j]) for j in 1:msing] + Phi_R_mats = Vector{Matrix{ComplexF64}}(undef, msing + 1) + Phi_R_mats[1] = assemble_fm_matrix(propagators, 1:i_crossings[1]-1) + for j in 2:msing + Phi_R_mats[j] = assemble_fm_matrix(propagators, i_crossings[j-1]+1:i_crossings[j]-1) + end + Phi_R_mats[msing+1] = assemble_fm_matrix(propagators, i_crossings[msing]+1:length(chunks)) + + # Resonant mode index (1:N) for each surface (single-resonance case) + ipert_all = [begin + sp = intr.sing[j] + idx = 1 + sp.m[1] - intr.mlow + (sp.n[1] - intr.nlow) * intr.mpert + @assert 1 <= idx <= N "Resonant mode index out of range" + idx + end for j in 1:msing] + + # BVP dimensions + nMat = (2 + 4 * msing) * N + s2 = 2 * msing + + # Column layout (1-indexed): + # x_axis: 1:N + # x_left[j]: N + 4N*(j-1)+1 : N + 4N*(j-1)+2N + # x_right[j]: N + 4N*(j-1)+2N+1 : N + 4N*j + # x_edge: N + 4N*msing+1 : nMat + col_axis = 1:N + col_left(j) = (N + 4N*(j-1)+1) : (N + 4N*(j-1)+2N) + col_right(j) = (N + 4N*(j-1)+2N+1) : (N + 4N*j) + col_edge = (N + 4N*msing+1) : nMat + + # Row layout: + # Axis-to-surface 1 junction: 1:2N (2N rows) + # For each surface j: + # Continuity: 2N + (4N-2)*(j-1)+1 : 2N + (4N-2)*(j-1)+(2N-2) (2N-2 rows) + # Junction/edge: 2N + (4N-2)*(j-1)+(2N-2)+1 : 2N + (4N-2)*j (2N rows) + # Driving terms: 2N + (4N-2)*msing+1 : nMat (2·msing rows) + row_drive_base = 2N + (4N-2)*msing + + M = zeros(ComplexF64, nMat, nMat) + + # Axis-to-surface 1 junction at psi_m[1]: + # Phi_R[1][:,N+1:2N]·x_axis = Phi_L[1]·x_left[1] + # → Phi_L[1]·x_left[1] - Phi_R[1][:,N+1:2N]·x_axis = 0 + # (Phi_R[1][:,N+1:2N] selects the N regular-solution columns from the axis IC U₂=I) + M[1:2N, col_left(1)] .= Phi_L_mats[1] + M[1:2N, col_axis] .= -view(Phi_R_mats[1], :, N+1:2N) + + for j in 1:msing + ipert_j = ipert_all[j] + + # Continuity at surface j: x_left[j][i] = x_right[j][i] for non-resonant i + # (skip i = ipert_j and i = ipert_j+N, the two resonant-mode rows) + row_cont = 2N + (4N-2)*(j-1) + for i in 1:2N + if i != ipert_j && i != ipert_j + N + row_cont += 1 + M[row_cont, col_left(j)[i]] = 1 + M[row_cont, col_right(j)[i]] = -1 + end + end + + # Junction / edge matching (2N rows starting at row_cont+1) + junc_rows = (row_cont+1) : (2N + (4N-2)*j) + if j < msing + # Junction at psi_m[j+1]: + # Phi_R[j+1]·x_right[j] = Phi_L[j+1]·x_left[j+1] + # → Phi_R[j+1]·x_right[j] - Phi_L[j+1]·x_left[j+1] = 0 + M[junc_rows, col_right(j)] .= Phi_R_mats[j+1] + M[junc_rows, col_left(j+1)] .= -Phi_L_mats[j+1] + else + # Conducting wall: Phi_R[msing+1]·x_right[msing] = [0; I_N]·x_edge + # Upper N rows: U₁ = 0 (no x_edge contribution) + # Lower N rows: U₂ = x_edge (contribution from -I·x_edge) + # (Phi_R[msing+1] is all forward chunks → same as old Phi_segs[msing+1]) + M[junc_rows, col_right(msing)] .= Phi_R_mats[msing+1] + M[junc_rows[N+1:end], col_edge] .= -I(N) + end + + # Driving terms: unit U₂[ipert_j] amplitude at left and right of surface j + M[row_drive_base + 2j-1, col_left(j)[ipert_j+N]] = 1 + M[row_drive_base + 2j, col_right(j)[ipert_j+N]] = 1 + end + + M_lu = lu(M) + delta_mat = zeros(ComplexF64, s2, s2) + b = zeros(ComplexF64, nMat) + + for jsing in 1:msing + for side in 1:2 # side=1: left drive; side=2: right drive + dRow = 2jsing - (2 - side) # 2j-1 for left, 2j for right + fill!(b, 0) + b[row_drive_base + dRow] = 1 + x = M_lu \ b + + for ksing in 1:msing + ipert_k = ipert_all[ksing] + # Extract U₂[ipert_k] at left and right boundaries of surface ksing + delta_mat[dRow, 2ksing-1] = x[col_left(ksing)[ipert_k+N]] + delta_mat[dRow, 2ksing] = x[col_right(ksing)[ipert_k+N]] + end + end + end + + intr.delta_prime_matrix = delta_mat +end + +""" + riccati_der!(du, u, params, psieval) + +Evaluate the explicit dual Riccati ODE right-hand side: + dS/dψ = w†·F̄⁻¹·w - S·Ḡ·S, w = Q - K̄·S + +where Q = diag(1/(m - n·q)) is the diagonal singular factor matrix. +The identity slice u[:,:,2] = I does not evolve (du[:,:,2] = 0). + +**NOTE**: This function is NOT used as the ODE RHS in `riccati_integrate_chunk!`. +The explicit Riccati ODE is numerically unstable for explicit solvers: the quadratic +term S·Ḡ·S causes finite-time blowup when K̄·S >> Q. Instead, `sing_der!` is used +with periodic renormalization via `renormalize_riccati_inplace!`. This function is +retained for reference and potential use with implicit solvers. + +See: Glasser (2018) Phys. Plasmas 25, 032507 — Eq. 19 (dual Riccati form) +""" +@with_pool pool function riccati_der!( + du::Array{ComplexF64,3}, + u::Array{ComplexF64,3}, + params::Tuple{ForceFreeStatesControl,Equilibrium.PlasmaEquilibrium, + FourFitVars,ForceFreeStatesInternal,OdeState,IntegrationChunk}, + psieval::Float64 +) + + _, equil, ffit, intr, odet, _ = params + + Npert = intr.numpert_total + S = @view u[:, :, 1] + dS = @view du[:, :, 1] + @view(du[:, :, 2]) .= 0 # identity does not evolve + + # Compute singfac = 1/(m - n·q) as column vector Q = diag(singfac_vec) + # [Glasser 2016 eq. 24] + singfac_vec = acquire!(pool, Float64, Npert) + singfac_mat = reshape(singfac_vec, intr.mpert, intr.npert) + odet.q = equil.profiles.q_spline(psieval; hint=odet.spline_hint) + singfac_mat .= 1.0 ./ ((intr.mlow:intr.mhigh) .- odet.q .* (intr.nlow:intr.nhigh)') + + # Allocate temporaries from pool + fmat_lower = acquire!(pool, ComplexF64, Npert, Npert) + kmat = similar!(pool, fmat_lower) + gmat = similar!(pool, fmat_lower) + w = similar!(pool, fmat_lower) # w = Q - K̄·S + v = similar!(pool, fmat_lower) # v = F̄⁻¹·w (then reused for S·Ḡ·S) + tmp = similar!(pool, fmat_lower) # scratch + + # Evaluate F̄ (Cholesky factor), K̄, Ḡ splines at current ψ + ffit.fmats_lower(vec(fmat_lower), psieval; hint=ffit._hint) + ffit.kmats(vec(kmat), psieval; hint=ffit._hint) + ffit.gmats(vec(gmat), psieval; hint=ffit._hint) + + # w = Q - K̄·S: w[i,j] = singfac_vec[i]·δ_ij - (K̄·S)[i,j] + # Q is DIAGONAL (singfac_vec[i] only on i==j), so we cannot broadcast singfac_vec + # over all columns — that would give the wrong off-diagonal values. + mul!(w, kmat, S) # w = K̄·S + @. w = -w # w = -K̄·S + for i in 1:Npert + @inbounds w[i, i] += singfac_vec[i] # add diagonal Q: w = Q - K̄·S + end + + # v = F̄⁻¹·w (in-place Cholesky solve with stored lower-triangular factor) + v .= w + ldiv!(LowerTriangular(fmat_lower), v) + ldiv!(UpperTriangular(fmat_lower'), v) + + # dS = w†·v - S·Ḡ·S [Glasser 2018 eq. 19, dual Riccati] + mul!(dS, adjoint(w), v) # dS = w†·v + + # Store du1/dψ = Q·v for ud diagnostic before v is reused + # Q·v = diag(singfac_vec)·v = Ξ'_Ψ (displacement gradient, with U₂ = I) + @. odet.ud[:, :, 1] = singfac_vec * v + @view(odet.ud[:, :, 2]) .= 0 + + # Subtract S·Ḡ·S (reuse v and tmp to avoid extra allocation) + mul!(tmp, gmat, S) # tmp = Ḡ·S + mul!(v, S, tmp) # v = S·Ḡ·S + dS .-= v +end + +""" + riccati_integrator_callback!(integrator) + +Callback function for the Riccati ODE integrator. Handles tolerance updates, +renormalization, and storage at each step. + +Uses `sing_der!` as the ODE RHS: u[:,:,1] = U₁ (starts as S), u[:,:,2] = U₂ (starts as I). +When max(|U₁|) or max(|U₂|) exceeds `ctrl.ucrit`, applies `renormalize_riccati_inplace!` +to compute S = U₁·U₂⁻¹ and reset U₂ = I. This is the Riccati analogue of Gaussian +reduction in the standard `integrator_callback!`, and keeps the ODE inputs bounded. +""" +function riccati_integrator_callback!(integrator) + + ctrl, _, _, intr, odet, chunk = integrator.p + + # Update integration tolerances (same logic as integrator_callback!) + integrator.opts.reltol = compute_tols(ctrl, intr, odet, chunk.ising) + + # Renormalize when norms exceed ucrit (analogous to Gaussian reduction in integrator_callback!) + # During sing_der! integration: u[:,:,1]=U₁ (grows), u[:,:,2]=U₂ (grows). + # Renorm computes S = U₁·U₂⁻¹ and resets U₂ = I, keeping inputs bounded. + if maximum(abs, @view(integrator.u[:, :, 1])) > ctrl.ucrit || + maximum(abs, @view(integrator.u[:, :, 2])) > ctrl.ucrit + renormalize_riccati_inplace!(integrator.u, intr.numpert_total) + end + + # Determine if we should save this step + psi_range = abs(integrator.sol.prob.tspan[2] - integrator.sol.prob.tspan[1]) + psi_remaining = abs(integrator.sol.prob.tspan[2] - integrator.t) + near_end = psi_remaining < 0.05 * psi_range || psi_remaining < 1e-4 + steps_in_segment = length(integrator.sol.t) + near_start = steps_in_segment <= 2 + should_save = near_start || near_end || (odet.step % ctrl.save_interval == 0) + + if should_save + if odet.step >= size(odet.u_store, 4) + resize_storage!(odet) + end + odet.psi_store[odet.step] = integrator.t + @views odet.u_store[:, :, :, odet.step] .= integrator.u + odet.q_store[odet.step] = odet.q + @views odet.ud_store[:, :, :, odet.step] .= odet.ud + odet.step += 1 + end +end + +""" + riccati_integrate_chunk!(odet, ctrl, equil, ffit, intr, chunk) + +Integrate the dual Riccati ODE from `chunk.psi_start` to `chunk.psi_end`. + +Uses `sing_der!` as the ODE RHS with `riccati_integrator_callback!`, which applies +`renormalize_riccati_inplace!` (instead of Gaussian reduction) when norms exceed ucrit. +Starting state: u[:,:,1] = S_prev, u[:,:,2] = I (set by initialization or previous renorm). +Ending state: u[:,:,1] = U₁, u[:,:,2] = U₂ (ratio S = U₁·U₂⁻¹ is the updated Riccati matrix). +""" +function riccati_integrate_chunk!( + odet::OdeState, ctrl::ForceFreeStatesControl, equil::Equilibrium.PlasmaEquilibrium, + ffit::FourFitVars, intr::ForceFreeStatesInternal, chunk::IntegrationChunk +) + cb = DiscreteCallback((u, t, integrator) -> true, riccati_integrator_callback!) + rtol = compute_tols(ctrl, intr, odet, chunk.ising) + prob = ODEProblem(sing_der!, odet.u, (chunk.psi_start, chunk.psi_end), + (ctrl, equil, ffit, intr, odet, chunk)) + sol = solve(prob, BS5(); reltol=rtol, callback=cb, save_everystep=false, save_end=true) + odet.u .= sol.u[end] + odet.psifac = sol.t[end] + # Renormalize end state to (S, I) convention for the next chunk. + # When a crossing follows (needs_crossing=true), skip renorm so that ca_l is computed + # from the bounded (U₁, U₂) state in riccati_cross_ideal_singular_surf!: this gives + # consistent normalization with ca_r (also from pre-renorm state), enabling correct Δ'. + # The callback guarantees max(|U₁|), max(|U₂|) ≤ ucrit, so the state is bounded. + if !chunk.needs_crossing + renormalize_riccati_inplace!(odet.u, intr.numpert_total) + end +end + +""" + renormalize_riccati!(odet, intr) + +After a singular surface crossing, restore the canonical Riccati storage convention: + u[:,:,1] = S_new = U₁_new · U₂_new⁻¹ + u[:,:,2] = I + +`riccati_cross_ideal_singular_surf!` leaves u[:,:,1] = U₁_new and u[:,:,2] = U₂_new (not I), +so this step is required before continuing the Riccati integration. + +The u_store entry from the crossing correctly has U₁_new and U₂_new (stored before this call), +so `compute_smallest_eigenvalue` still computes U₁_new/U₂_new = S_new correctly. +""" +function renormalize_riccati!(odet::OdeState, intr::ForceFreeStatesInternal) + N = intr.numpert_total + # S_new = U₁_new · U₂_new⁻¹ (in-place to avoid allocation) + U2_copy = copy(@view odet.u[:, :, 2]) + rdiv!(@view(odet.u[:, :, 1]), lu!(U2_copy)) + # Reset U₂ = I + fill!(@view(odet.u[:, :, 2]), 0) + for i in 1:N + odet.u[i, i, 2] = 1 + end +end + +""" + renormalize_riccati_inplace!(u, N) + +In-place Riccati renormalization on an arbitrary N×N×2 array: + u[:,:,1] = U₁ · U₂⁻¹ (new S) + u[:,:,2] = I + +Used in `riccati_integrator_callback!` to renormalize the integrator's live state +when column norms grow beyond `ctrl.ucrit`, analogous to Gaussian reduction in the +standard ODE. This keeps the inputs to `sing_der!` bounded, preventing the same +exponential growth that occurs in the standard (non-Riccati) ODE without Gaussian reduction. +""" +function renormalize_riccati_inplace!(u::Array{ComplexF64,3}, N::Int) + U2_copy = copy(@view u[:, :, 2]) + rdiv!(@view(u[:, :, 1]), lu!(U2_copy)) + fill!(@view(u[:, :, 2]), 0) + for i in 1:N + u[i, i, 2] = 1 + end +end + +""" + riccati_cross_ideal_singular_surf!(odet, ctrl, equil, ffit, intr, ising) + +Cross a singular surface for the Riccati formulation. Replaces `cross_ideal_singular_surf!` +for the Riccati integration path with two key differences: + +1. **No Gaussian reduction**: `cross_ideal_singular_surf!` calls `compute_solution_norms!` + which applies Gaussian reduction to (S, I). This divides by pivot elements of S, which + can be near-zero (S = 0 at axis and grows slowly), producing NaN/Inf in U₂. For Riccati, + S is bounded so Gaussian reduction is unnecessary. + +2. **Direct column zeroing**: Instead of using the GR-sorted `odet.index` to identify the + column to zero, we use `ipert_res` directly (the resonant mode index). This is valid since + without GR there is no permutation applied to the columns of S. + +**Δ' normalization**: This function expects `odet.u` in the bounded (U₁, U₂) form produced by +`riccati_integrate_chunk!` with `needs_crossing=true` (final renorm skipped). ca_l is computed +from (U₁, U₂) before the crossing, and ca_r from (U₁_new, U₂_new) before `renormalize_riccati!`. +Since column `ipert_res` of [U₁_new; U₂_new] equals the introduced asymptotic solution exactly, +ca_r[ipert_res,ipert_res,2] = 1 regardless of other column normalizations. This gives a +physically meaningful Δ' = ca_r - ca_l with consistent left/right normalization. + +After the predictor step and asymptotic introduction, `renormalize_riccati!` is called +to restore the canonical (S_new, I) form before continuing integration. + +The u_store entry at the crossing step correctly stores (U₁_new, U₂_new) so that +`evaluate_stability_criterion!` can compute U₁_new / U₂_new = S_new correctly. +""" +function riccati_cross_ideal_singular_surf!( + odet::OdeState, ctrl::ForceFreeStatesControl, equil::Equilibrium.PlasmaEquilibrium, + ffit::FourFitVars, intr::ForceFreeStatesInternal, ising::Int +) + # Skip Gaussian reduction — S is bounded so no large-norm columns exist + + singp = intr.sing[ising] + sing_asymp = compute_sing_asymptotics(singp, ctrl, equil, ffit, intr) + dpsi = singp.psifac - odet.psifac # ψ_res - ψ_current (positive) + + # Get asymptotic coefficients before crossing + ua = sing_get_ua(sing_asymp, -dpsi) + odet.ca_l[:, :, :, ising] .= sing_get_ca(odet.u, ua, intr) + + # Resonant perturbation indices (same formula as in cross_ideal_singular_surf!) + ipert_res = 1 .+ singp.m .- intr.mlow .+ (singp.n .- intr.nlow) .* intr.mpert + + if !ctrl.con_flag + # Zero the resonant column of (S, I) using ipert_res directly (no GR sorting needed). + # The zeroed column stays zero through the predictor step since both slices are zero. + for i in eachindex(sing_asymp.r1) + odet.u[:, ipert_res[i], :] .= 0 + end + end + + # Predictor: approximate solution on the other side of the singular surface. + # sing_der! works on any (U1, U2) state — the zeroed column remains zero since + # du1[:, ipert_res] = 0 and du2[:, ipert_res] = 0 when u[:, ipert_res, :] = 0. + params = (ctrl, equil, ffit, intr, odet, IntegrationChunk(0.0, 0.0, false, ising, 1)) + du1 = zeros(ComplexF64, intr.numpert_total, intr.numpert_total, 2) + du2 = zeros(ComplexF64, intr.numpert_total, intr.numpert_total, 2) + sing_der!(du1, odet.u, params, odet.psifac) + odet.psifac += 2 * dpsi # jump to other side of singular surface + sing_der!(du2, odet.u, params, odet.psifac) + odet.u .+= (du1 .+ du2) .* dpsi + + # Apply asymptotic solution on other side of singular surface + ua = sing_get_ua(sing_asymp, dpsi) + if !ctrl.con_flag + for i in eachindex(sing_asymp.r1) + # Zero the resonant row (removes large components at the resonant mode) + odet.u[ipert_res[i], :, :] .= 0 + # Introduce the small asymptotic resonant solution in the zeroed column. + # ua[:, ipert_res[i]+numpert_total, :] is the "lower" (small) solution for mode ipert_res[i]. + # After this, u[:,:,2] = U₂_new ≠ I (has asymptotic in column ipert_res[i]); + # renormalize_riccati! will compute S_new = U₁_new · U₂_new⁻¹ and reset U₂ = I. + odet.u[:, ipert_res[i], :] .= ua[:, ipert_res[i]+intr.numpert_total, :] + end + end + # Compute ca_r from (U₁_new, U₂_new) before renormalization. + # Column ipert_res of [U₁_new; U₂_new] = ua[:,ipert_res+N,:] (the introduced small asymptotic), + # so ca_r[:,ipert_res] = e_{ipert_res+N} and ca_r[ipert_res,ipert_res,2] = 1 regardless of + # the normalization of the other columns. This gives Δ' = 1 - ca_l[ipert_res,ipert_res,2]. + odet.ca_r[:, :, :, ising] .= sing_get_ca(odet.u, ua, intr) + + # Compute Δ' using ipert_res directly (no GR → perm_col = ipert_res, ca_r diagonal = 1). + # Also compute the full column Δ' (all N modes) for the off-diagonal coupling. + if !ctrl.con_flag + denom = (2π)^2 * equil.psio + n_res = length(sing_asymp.r1) + N = intr.numpert_total + resize!(intr.sing[ising].delta_prime, n_res) + intr.sing[ising].delta_prime_col = zeros(ComplexF64, N, n_res) + for i in eachindex(sing_asymp.r1) + Δca_col = (odet.ca_r[:, ipert_res[i], 2, ising] - odet.ca_l[:, ipert_res[i], 2, ising]) / denom + intr.sing[ising].delta_prime_col[:, i] .= Δca_col + intr.sing[ising].delta_prime[i] = Δca_col[ipert_res[i]] + end + end + + # Store (U₁_new, U₂_new) before renormalization so evaluate_stability_criterion! + # can recover S_new = U₁_new / U₂_new correctly via compute_smallest_eigenvalue + odet.psi_store[odet.step] = odet.psifac + odet.q_store[odet.step] = odet.q + odet.u_store[:, :, :, odet.step] = odet.u + odet.ud_store[:, :, :, odet.step] = odet.ud + odet.step += 1 + + # Renormalize to Riccati convention: S_new = U₁_new · U₂_new⁻¹, reset U₂ = I + renormalize_riccati!(odet, intr) +end + +""" + riccati_eulerlagrange_integration(ctrl, equil, ffit, intr) -> OdeState + +Main driver for integrating the dual Riccati ODE across the plasma. +Functionally identical to `eulerlagrange_integration` except: + +1. Uses `riccati_integrate_chunk!`: drives `sing_der!` with `riccati_integrator_callback!` + which applies `renormalize_riccati_inplace!` (instead of Gaussian reduction) when + column norms exceed ucrit +2. Uses `riccati_cross_ideal_singular_surf!` instead of `cross_ideal_singular_surf!`: + skips Gaussian reduction (avoids near-zero pivot issues when S is small near axis) + and renormalizes to (S_new, I) in one step +3. Skips `transform_u!` — S is already the true solution, no Gaussian-reduction undo needed + +Enable via `use_riccati = true` in `[ForceFreeStates]` section of jpec.toml, or by +setting `ctrl.use_riccati = true` programmatically. +""" +function riccati_eulerlagrange_integration( + ctrl::ForceFreeStatesControl, equil::Equilibrium.PlasmaEquilibrium, + ffit::FourFitVars, intr::ForceFreeStatesInternal +) + # Initialization — same as eulerlagrange_integration + odet = OdeState(intr.numpert_total, ctrl.numsteps_init, ctrl.numunorms_init, intr.msing) + if ctrl.sing_start <= 0 + initialize_el_at_axis!(odet, ctrl, equil.profiles, intr) + # axis init sets u[:,:,1]=0, u[:,:,2]=I → S=0 at axis ✓ + elseif ctrl.sing_start <= intr.msing + error("sing_start > 0 not implemented yet!") + else + error("Invalid value for sing_start: $(ctrl.sing_start) > msing = $(intr.msing)") + end + + chunks = chunk_el_integration_bounds(odet, ctrl, intr) + + # Prime odet.new = false so that compute_solution_norms! (if called elsewhere) + # does not skip Gaussian reduction on first invocation. Also initialize unorm0 + # to safe defaults since the Riccati callback never calls compute_solution_norms!. + odet.new = false + fill!(odet.unorm0, 1.0) + + if ctrl.verbose + @info " ψ = $((@sprintf "%.3f" odet.psifac)), q = $((@sprintf "%.3f" equil.profiles.q_spline(odet.psifac)))" + end + + for chunk in chunks + # Integrate this chunk using the Riccati ODE (Riccati callback skips Gaussian reduction) + riccati_integrate_chunk!(odet, ctrl, equil, ffit, intr, chunk) + if ctrl.verbose + @info " ψ = $((@sprintf "%.3f" odet.psifac)), q= $((@sprintf "%.3f" odet.q)), max(S) = $((@sprintf "%.2e" maximum(abs, odet.u[:,:,1]))), steps = $(odet.step-1)" + end + + # Cross rational surface (Riccati crossing skips GR, uses ipert_res directly) + if chunk.needs_crossing + if ctrl.kin_flag + error("kin_flag = true not implemented yet!") + else + riccati_cross_ideal_singular_surf!(odet, ctrl, equil, ffit, intr, chunk.ising) + # renormalize_riccati! is called inside riccati_cross_ideal_singular_surf! + end + end + end + + # Find peak dW in edge region if applicable (uses free_compute_total which reads wp = I/S = P) + if ctrl.psiedge < intr.psilim + odet.step = findmax_dW_edge!(odet, ctrl, equil, ffit, intr) + trim_storage!(odet) + if ctrl.verbose + @info "Truncating integration at peak edge dW: ψ = $((@sprintf "%.2f" odet.psi_store[odet.step])), q = $((@sprintf "%.2f" odet.q_store[odet.step]))" + end + intr.psilim = odet.psi_store[end] + intr.qlim = odet.q_store[end] + odet.u .= odet.u_store[:, :, :, end] + else + odet.step -= 1 + trim_storage!(odet) + end + + # Evaluate fixed-boundary stability criterion + if ctrl.verbose + @info "Evaluating fixed-boundary stability criterion" + end + odet.nzero = evaluate_stability_criterion!(odet, equil.profiles) + + # Note: transform_u! is intentionally skipped. + # S is already the true solution (invariant under Gaussian reduction), + # and u_store entries have u[:,:,1]=S, u[:,:,2]=I throughout integration. + # At crossing steps, u_store has U₁_new/U₂_new which compute_smallest_eigenvalue + # correctly resolves to S_new via rdiv. No transformation is needed. + + return odet +end + +""" + integrate_propagator_chunk!(prop, chunk, ctrl, equil, ffit, intr, odet_proxy) + +Compute the fundamental matrix (propagator) for one integration chunk by solving the +EL ODE twice from identity-block initial conditions. + +The first solve uses IC = (I_N, 0_N) (U₁=I, U₂=0) and stores the result in +`prop.block_upper_ic`. The second uses IC = (0_N, I_N) (U₁=0, U₂=I) and stores +the result in `prop.block_lower_ic`. + +`odet_proxy` is a per-thread lightweight `OdeState` used to provide thread-local +storage for `sing_der!` side effects (`q`, `ud`, `spline_hint`). Multiple threads +may call this function concurrently using distinct `odet_proxy` objects. + +No callback is used: the propagator integration proceeds without normalization or +storage steps, since the identity ICs ensure bounded solutions within each chunk. +""" +function integrate_propagator_chunk!( + prop::ChunkPropagator, + chunk::IntegrationChunk, + ctrl::ForceFreeStatesControl, + equil::Equilibrium.PlasmaEquilibrium, + ffit::FourFitVars, + intr::ForceFreeStatesInternal, + odet_proxy::OdeState +) + N = intr.numpert_total + # Reverse tspan for backward chunks (direction=-1): OrdinaryDiffEq handles negative tspan + # naturally. The resulting propagator maps state at psi_end → psi_start, which is + # well-conditioned because exponentially growing solutions (forward) decay backward. + tspan = chunk.direction == 1 ? + (chunk.psi_start, chunk.psi_end) : + (chunk.psi_end, chunk.psi_start) + rtol = chunk.ising > 0 ? ctrl.tol_r : ctrl.tol_nr + params = (ctrl, equil, ffit, intr, odet_proxy, chunk) + + # Upper block IC: U₁ = I, U₂ = 0 + u_upper = zeros(ComplexF64, N, N, 2) + for i in 1:N + u_upper[i, i, 1] = 1 + end + odet_proxy.spline_hint[] = 1 + prob = ODEProblem(sing_der!, u_upper, tspan, params) + sol = solve(prob, BS5(); reltol=rtol, save_everystep=false, save_end=true) + prop.block_upper_ic .= sol.u[end] + + # Lower block IC: U₁ = 0, U₂ = I + u_lower = zeros(ComplexF64, N, N, 2) + for i in 1:N + u_lower[i, i, 2] = 1 + end + odet_proxy.spline_hint[] = 1 + prob = ODEProblem(sing_der!, u_lower, tspan, params) + sol = solve(prob, BS5(); reltol=rtol, save_everystep=false, save_end=true) + prop.block_lower_ic .= sol.u[end] +end + +""" + apply_propagator!(odet, prop) + +Apply the chunk propagator `prop` to the current state `odet.u` in-place. + +The propagator acts as a linear map on the (U₁, U₂) pair: + + U₁_new = block_upper_ic[:,:,1] · U₁_prev + block_lower_ic[:,:,1] · U₂_prev + U₂_new = block_upper_ic[:,:,2] · U₁_prev + block_lower_ic[:,:,2] · U₂_prev + +This correctly propagates any state (not just the identity), including the +(S, I) form produced by Riccati-style crossings. +""" +function apply_propagator!(odet::OdeState, prop::ChunkPropagator) + U1_upper = @view prop.block_upper_ic[:, :, 1] + U2_upper = @view prop.block_upper_ic[:, :, 2] + U1_lower = @view prop.block_lower_ic[:, :, 1] + U2_lower = @view prop.block_lower_ic[:, :, 2] + + u1_prev = copy(@view odet.u[:, :, 1]) + u2_prev = copy(@view odet.u[:, :, 2]) + tmp = similar(u1_prev) + + # U₁_new = U1_upper · u1_prev + U1_lower · u2_prev + mul!(view(odet.u, :, :, 1), U1_upper, u1_prev) + mul!(tmp, U1_lower, u2_prev) + odet.u[:, :, 1] .+= tmp + + # U₂_new = U2_upper · u1_prev + U2_lower · u2_prev + mul!(view(odet.u, :, :, 2), U2_upper, u1_prev) + mul!(tmp, U2_lower, u2_prev) + odet.u[:, :, 2] .+= tmp +end + +""" + apply_propagator_inverse!(odet, prop) + +Apply the *inverse* of the chunk propagator `prop` to the current state `odet.u` in-place. + +Used for backward chunks (direction=-1): the stored propagator Φ_bwd maps state at +`psi_end` → state at `psi_start` (well-conditioned because solutions that grow +exponentially forward decay backward). To advance the Riccati state from `psi_start` +to `psi_end`, we solve Φ_bwd · x = u_old, which gives x = Φ_bwd⁻¹ · u_old = Φ_fwd · u_old. + +Since Φ_bwd is well-conditioned, the LU solve is accurate, giving the same result as +applying the (ill-conditioned) forward propagator Φ_fwd but with far better precision. +""" +function apply_propagator_inverse!(odet::OdeState, prop::ChunkPropagator) + N = size(odet.u, 1) + # Assemble 2N×2N backward FM Φ_bwd + Φ = [prop.block_upper_ic[:,:,1] prop.block_lower_ic[:,:,1]; + prop.block_upper_ic[:,:,2] prop.block_lower_ic[:,:,2]] + # Φ_bwd maps state at psi_end → psi_start (well-conditioned). + # We want Φ_fwd = Φ_bwd⁻¹ to advance state from psi_start → psi_end. + # Solving Φ_bwd · x = [U₁_old; U₂_old] gives x = Φ_bwd⁻¹ · [U₁_old; U₂_old]. + u_old = [odet.u[:,:,1]; odet.u[:,:,2]] # 2N × N + u_new = Φ \ u_old # LU solve, 2N × N + odet.u[:,:,1] .= u_new[1:N, :] + odet.u[:,:,2] .= u_new[N+1:2N, :] +end + +""" + parallel_eulerlagrange_integration(ctrl, equil, ffit, intr) -> OdeState + +Parallel fundamental matrix (propagator) driver for the EL integration. + +Functionally equivalent to `eulerlagrange_integration`, integrating all bulk chunks +concurrently using `Threads.@threads`, then re-integrating the outer plasma serially: + +1. **Chunk generation**: calls `chunk_el_integration_bounds`, then `balance_integration_chunks` + to sub-divide chunks for load-balanced parallel execution. +2. **Parallel phase**: `integrate_propagator_chunk!` integrates each chunk independently + from identity initial conditions (no accumulated state, no normalization/callback). + Each thread uses a private `OdeState` proxy for `sing_der!` side effects. +3. **Serial assembly**: propagators are applied sequentially with `apply_propagator!`. + Rational surface crossings use `riccati_cross_ideal_singular_surf!` (no Gaussian + reduction) matching the Riccati path convention. +4. **Outer plasma re-integration**: after the last rational surface crossing, the outer + plasma (from last ψ_s to psilim) is re-integrated using `riccati_integrate_chunk!`. + FM propagation in this region is prone to precision loss for high N (exponential growth + without renormalization); Riccati integration keeps matrices bounded and provides dense + checkpoints for `findmax_dW_edge!`. + +Enable via `use_parallel = true` in `[ForceFreeStates]` of jpec.toml, or by setting +`ctrl.use_parallel = true` programmatically. Requires `singfac_min != 0`. + +**Key differences from standard integration:** +- No Gaussian reduction (crossings use riccati-style, odet.ifix stays 0) +- `transform_u!` is called but is a no-op (identity transform, ifix=0) +- `ud_store` is approximate (set to zeros for FM chunks; does not affect energies or Δ') +- Outer plasma uses serial Riccati integration for numerical stability + +**Bidirectional integration for large-N accuracy:** +The crossing chunk (nearest to each rational surface singL[j]) is integrated *backward* +(`direction=-1`, `tspan` reversed). Backward integration of a region where solutions grow +exponentially forward causes them to *decay*, so the resulting backward FM Φ_bwd is +well-conditioned. The accurate forward propagation is recovered as Φ_bwd⁻¹ via a stable +LU solve in `apply_propagator_inverse!`. This follows the same principle as STRIDE +(Glasser 2018 Phys. Plasmas 25, 032501). The all-forward path had ~10% energy error for +the DIIID-like example (N=26, n=1); bidirectional reduces this to within 2%. +""" +function parallel_eulerlagrange_integration( + ctrl::ForceFreeStatesControl, equil::Equilibrium.PlasmaEquilibrium, + ffit::FourFitVars, intr::ForceFreeStatesInternal +) + # Initialization — same as eulerlagrange_integration + odet = OdeState(intr.numpert_total, ctrl.numsteps_init, ctrl.numunorms_init, intr.msing) + if ctrl.sing_start <= 0 + initialize_el_at_axis!(odet, ctrl, equil.profiles, intr) + elseif ctrl.sing_start <= intr.msing + error("sing_start > 0 not implemented yet!") + else + error("Invalid value for sing_start: $(ctrl.sing_start) > msing = $(intr.msing)") + end + + # Prime odet.new = false (consistent with riccati path — no Gaussian reduction used) + odet.new = false + fill!(odet.unorm0, 1.0) + + # Build chunks and sub-divide for load-balanced parallel execution. + # bidirectional=true: crossing chunks (nearest to each rational surface) are assigned + # direction=-1, so they are integrated backward. The resulting backward propagator + # Φ_bwd is well-conditioned because growing EL solutions decay backward. The forward + # propagation is recovered as Φ_bwd⁻¹ via LU solve in apply_propagator_inverse!. + base_chunks = chunk_el_integration_bounds(odet, ctrl, intr; bidirectional=true) + chunks = balance_integration_chunks(base_chunks, ctrl, intr) + + N = intr.numpert_total + propagators = [ChunkPropagator(N) for _ in chunks] + + # Per-thread lightweight proxy OdeState for sing_der! side effects + nthreads = Threads.nthreads() + odet_proxies = [OdeState(N, 1, 1, 0) for _ in 1:nthreads] + + if ctrl.verbose + @info " ψ = $((@sprintf "%.3f" odet.psifac)), q = $((@sprintf "%.3f" equil.profiles.q_spline(odet.psifac)))" + @info " Parallel FM: $(length(chunks)) chunks, $nthreads threads" + end + + # PARALLEL phase: integrate all chunks independently from identity IC. + # :static scheduler pins each task to one OS thread for its lifetime, so + # Threads.threadid() returns a stable index into odet_proxies. + # Without :static, Julia's task scheduler can migrate tasks between threads, + # making threadid() unreliable (Julia 1.7+). + Threads.@threads :static for i in eachindex(chunks) + integrate_propagator_chunk!(propagators[i], chunks[i], ctrl, equil, ffit, intr, + odet_proxies[Threads.threadid()]) + end + + # SERIAL assembly: apply propagators and handle crossings in order. + # After each apply_propagator!, renormalize to (S, I) form. This is the Julia + # equivalent of STRIDE's ode_fixup: it prevents exponential growth of the + # accumulated state between crossings. Without this renorm, products of N chunk + # FMs can have condition numbers up to (cond_per_chunk)^N, causing catastrophic + # cancellation for large N (N ≳ 20). With renorm, each chunk is applied as a + # Möbius transformation on the bounded S matrix, keeping errors at O(eps × cond_chunk) + # rather than O(eps × cond_chunk^N). [STRIDE ode.F: ode_fixup called after each uAxis step] + # + # last_crossing_step tracks the u_store index of the most recent crossing so that + # the outer plasma (from last rational surface to psilim) can be re-integrated. + last_crossing_step = 1 + for (i, chunk) in enumerate(chunks) + # Forward chunks: apply propagator directly (Φ_fwd maps psi_start → psi_end). + # Backward chunks (crossing chunks with direction=-1): apply inverse of the + # backward propagator. Φ_bwd maps psi_end → psi_start and is well-conditioned; + # its inverse Φ_fwd = Φ_bwd⁻¹ gives accurate forward propagation via LU solve. + if chunk.direction == -1 + apply_propagator_inverse!(odet, propagators[i]) + else + apply_propagator!(odet, propagators[i]) + end + # Renorm to (S, I) after every chunk — equivalent to STRIDE's ode_fixup. + # The state entering each crossing is already in (S, I) form. + renormalize_riccati_inplace!(odet.u, N) + odet.psifac = chunk.psi_end + odet.q = equil.profiles.q_spline(odet.psifac) + + if ctrl.verbose + @info " ψ = $((@sprintf "%.3f" odet.psifac)), q= $((@sprintf "%.3f" odet.q)), max(S) = $((@sprintf "%.2e" maximum(abs, odet.u[:,:,1]))), steps = $(odet.step-1)" + end + + if chunk.needs_crossing + if ctrl.kin_flag + error("kin_flag = true not implemented yet!") + else + # State is already (S, I) from the renorm above. + # riccati_cross_ideal_singular_surf! zeros column ipert_res directly + # (the resonant mode, no GR permutation needed in Riccati form). + riccati_cross_ideal_singular_surf!(odet, ctrl, equil, ffit, intr, chunk.ising) + last_crossing_step = odet.step - 1 # u_store index of the crossing state + end + else + # Save non-crossing end-of-chunk state (now always in (S, I) form) + if odet.step >= size(odet.u_store, 4) + resize_storage!(odet) + end + odet.psi_store[odet.step] = odet.psifac + odet.q_store[odet.step] = odet.q + @views odet.u_store[:, :, :, odet.step] .= odet.u + # ud not available from propagator integration — left as zeros + odet.step += 1 + end + end + + # Re-integrate the outer plasma (from last rational surface crossing to psilim) using + # Riccati for numerical stability and dense checkpoint storage. + # + # FM propagation in the outer plasma (no rational surfaces) is prone to precision loss + # for high N: the solution grows exponentially without renormalization, causing matrix + # condition numbers to grow and wp = U₂·U₁⁻¹ to lose accuracy. Riccati integration + # keeps matrices bounded via periodic renormalization. + # + # Dense checkpoints from this re-integration are also required for findmax_dW_edge! to + # accurately locate the peak dW in the edge region (psiedge < psilim case). + # + # The u_store entry at last_crossing_step contains (U₁_new, U₂_new) stored by + # riccati_cross_ideal_singular_surf! before renormalization; renormalizing here gives + # (S_new, I) as the correct Riccati starting state for the re-integration. + odet.u .= odet.u_store[:, :, :, last_crossing_step] + odet.psifac = odet.psi_store[last_crossing_step] + odet.q = odet.q_store[last_crossing_step] + odet.step = last_crossing_step + 1 + renormalize_riccati_inplace!(odet.u, N) + outer_chunk = IntegrationChunk(; psi_start=odet.psifac, psi_end=intr.psilim * (1 - eps), + needs_crossing=false, ising=0) + riccati_integrate_chunk!(odet, ctrl, equil, ffit, intr, outer_chunk) + # After riccati_integrate_chunk! with needs_crossing=false: + # odet.u is in (S, I) form (renorm'd at end of integration) + # odet.step points to next empty slot; dense checkpoints stored for outer region + + # Find peak dW in edge region (same as standard/Riccati path) + if ctrl.psiedge < intr.psilim + odet.step = findmax_dW_edge!(odet, ctrl, equil, ffit, intr) + trim_storage!(odet) + if ctrl.verbose + @info "Truncating integration at peak edge dW: ψ = $((@sprintf "%.2f" odet.psi_store[odet.step])), q = $((@sprintf "%.2f" odet.q_store[odet.step]))" + end + intr.psilim = odet.psi_store[end] + intr.qlim = odet.q_store[end] + odet.u .= odet.u_store[:, :, :, end] + # The stored state may be a pre-renorm callback snapshot; renorm to (S, I) for free_run! + renormalize_riccati_inplace!(odet.u, N) + else + odet.step -= 1 + trim_storage!(odet) + # odet.u is already in (S, I) from riccati_integrate_chunk! above + end + + # Compute inter-surface Δ' matrix using the STRIDE global BVP. + # Uses the chunk propagators from the parallel phase (all chunks, including outer plasma). + # Only called when there are singular surfaces to couple. + if !ctrl.con_flag && intr.msing > 0 + compute_delta_prime_matrix!(intr, propagators, chunks) + end + + # Evaluate fixed-boundary stability criterion + if ctrl.verbose + @info "Evaluating fixed-boundary stability criterion" + end + odet.nzero = evaluate_stability_criterion!(odet, equil.profiles) + + # transform_u! is called for consistency but is a no-op (ifix=0, no Gaussian reduction) + transform_u!(odet, intr) + + return odet +end diff --git a/src/GeneralizedPerturbedEquilibrium.jl b/src/GeneralizedPerturbedEquilibrium.jl index 93c5fd05..8f16adb7 100755 --- a/src/GeneralizedPerturbedEquilibrium.jl +++ b/src/GeneralizedPerturbedEquilibrium.jl @@ -439,6 +439,53 @@ function write_outputs_to_HDF5( out_h5["singular/ca_left"] = odet.ca_l out_h5["singular/ca_right"] = odet.ca_r + if intr.msing > 0 + # Mode numbers at each surface (jagged — pad with 0 to max_modes width) + max_modes = maximum(s -> length(s.m), intr.sing) + m_matrix = zeros(Int, intr.msing, max_modes) + n_matrix = zeros(Int, intr.msing, max_modes) + for (s, sing) in enumerate(intr.sing) + for i in 1:length(sing.m) + m_matrix[s, i] = sing.m[i] + n_matrix[s, i] = sing.n[i] + end + end + out_h5["singular/m"] = m_matrix + out_h5["singular/n"] = n_matrix + end + + # Write Δ' if computed (one complex value per resonant mode per singular surface) + if intr.msing > 0 && all(s -> !isempty(s.delta_prime), intr.sing) + max_modes = maximum(s -> length(s.delta_prime), intr.sing) + dp_matrix = zeros(ComplexF64, intr.msing, max_modes) + for (s, sing) in enumerate(intr.sing) + for i in 1:length(sing.delta_prime) + dp_matrix[s, i] = sing.delta_prime[i] + end + end + out_h5["singular/delta_prime"] = dp_matrix + end + + # Write full off-diagonal Δ' column if computed (Riccati/parallel FM paths only). + # Shape: [numpert_total × max_modes × msing], where delta_prime_col[:, i, s] is + # the coupling of all N modes to resonant mode i at surface s. + if intr.msing > 0 && all(s -> !isempty(s.delta_prime_col), intr.sing) + N = size(intr.sing[1].delta_prime_col, 1) + max_modes = maximum(s -> size(s.delta_prime_col, 2), intr.sing) + dp_col_tensor = zeros(ComplexF64, N, max_modes, intr.msing) + for (s, sing) in enumerate(intr.sing) + n_res = size(sing.delta_prime_col, 2) + dp_col_tensor[:, 1:n_res, s] = sing.delta_prime_col + end + out_h5["singular/delta_prime_col"] = dp_col_tensor + end + + # Write inter-surface Δ' matrix if computed (parallel FM path only). + # Shape: [2·msing × 2·msing] where rows/columns index (surface, side) pairs. + if intr.msing > 0 && !isempty(intr.delta_prime_matrix) + out_h5["singular/delta_prime_matrix"] = intr.delta_prime_matrix + end + # Write vacuum data; always write all entries, using empty arrays when not computed out_h5["vacuum/wt"] = ctrl.vac_flag ? vac_data.wt : ComplexF64[] out_h5["vacuum/wt0"] = ctrl.vac_flag ? vac_data.wt0 : ComplexF64[] diff --git a/test/runtests.jl b/test/runtests.jl index 2efa4098..06d4daf7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -24,6 +24,8 @@ else include("./runtests_vacuum.jl") include("./runtests_equil.jl") include("./runtests_eulerlagrange.jl") + include("./runtests_riccati.jl") + include("./runtests_parallel_integration.jl") include("./runtests_sing.jl") include("./runtests_fullruns.jl") end diff --git a/test/runtests_parallel_integration.jl b/test/runtests_parallel_integration.jl new file mode 100644 index 00000000..4a85d76c --- /dev/null +++ b/test/runtests_parallel_integration.jl @@ -0,0 +1,446 @@ +using LinearAlgebra +using TOML + +@testset "Parallel FM Integration Tests" begin + + @testset "ChunkPropagator identity on trivial interval" begin + # Integrating over a zero-width interval should give the identity propagator. + # We test that apply_propagator! on an identity state preserves the state. + N = 3 + prop = GeneralizedPerturbedEquilibrium.ForceFreeStates.ChunkPropagator(N) + + # Set propagator to identity (block_upper_ic = (I, 0), block_lower_ic = (0, I)) + for i in 1:N + prop.block_upper_ic[i, i, 1] = 1 # U1 block from IC=(I,0) + prop.block_lower_ic[i, i, 2] = 1 # U2 block from IC=(0,I) + end + + # Apply identity propagator to an arbitrary state + odet = GeneralizedPerturbedEquilibrium.ForceFreeStates.OdeState(N, 10, 5, 0) + u1_in = [1.0+0.5im 0.2im 0.0; + 0.1+0.1im 1.2+0.1im 0.0; + 0.0im 0.0 0.9+0.3im] + u2_in = [0.8+0.1im 0.1im 0.0; + 0.0im 1.0+0.2im 0.1; + 0.1im 0.0 1.1+0.0im] + odet.u[:, :, 1] .= u1_in + odet.u[:, :, 2] .= u2_in + + GeneralizedPerturbedEquilibrium.ForceFreeStates.apply_propagator!(odet, prop) + + @test odet.u[:, :, 1] ≈ u1_in rtol=1e-12 + @test odet.u[:, :, 2] ≈ u2_in rtol=1e-12 + end + + @testset "apply_propagator! linearity" begin + # Verify that apply_propagator! applies the correct linear map. + N = 3 + prop = GeneralizedPerturbedEquilibrium.ForceFreeStates.ChunkPropagator(N) + + # Fill block_upper_ic and block_lower_ic with random data + rng_upper = [1.1+0.2im 0.1im 0.05; + 0.0im 0.9+0.3im 0.1; + 0.2+0.1im 0.0 1.0+0.1im] + rng_lower = [0.8+0.1im 0.1im 0.0; + 0.0im 1.2+0.2im 0.1; + 0.0im 0.1 0.9+0.1im] + prop.block_upper_ic[:, :, 1] .= rng_upper + prop.block_upper_ic[:, :, 2] .= 0.5 * rng_upper + prop.block_lower_ic[:, :, 1] .= 0.3 * rng_lower + prop.block_lower_ic[:, :, 2] .= rng_lower + + odet = GeneralizedPerturbedEquilibrium.ForceFreeStates.OdeState(N, 10, 5, 0) + u1_in = 0.5 * I(N) .+ 0.1im * ones(N, N) + u2_in = I(N) .+ 0.2im * ones(N, N) + odet.u[:, :, 1] .= u1_in + odet.u[:, :, 2] .= u2_in + + GeneralizedPerturbedEquilibrium.ForceFreeStates.apply_propagator!(odet, prop) + + # Manual computation of expected result + U1_upper = prop.block_upper_ic[:, :, 1] + U2_upper = prop.block_upper_ic[:, :, 2] + U1_lower = prop.block_lower_ic[:, :, 1] + U2_lower = prop.block_lower_ic[:, :, 2] + u1_expected = U1_upper * u1_in + U1_lower * u2_in + u2_expected = U2_upper * u1_in + U2_lower * u2_in + + @test odet.u[:, :, 1] ≈ u1_expected rtol=1e-12 + @test odet.u[:, :, 2] ≈ u2_expected rtol=1e-12 + end + + @testset "apply_propagator_inverse! is inverse of apply_propagator!" begin + # Verify that apply_propagator_inverse! is the algebraic inverse of apply_propagator!: + # applying inverse then forward should recover the original state exactly. + # This checks the LU-solve path: Φ \ (Φ * u) = u for an arbitrary invertible Φ. + N = 3 + prop = GeneralizedPerturbedEquilibrium.ForceFreeStates.ChunkPropagator(N) + + # Near-identity blocks guarantee the 2N×2N matrix [A B; C D] is invertible + A = I(N) .+ 0.15 * [1.0+0.2im 0.1im 0.05; 0.0im 0.9+0.3im 0.1; 0.2+0.1im 0.0 1.0+0.1im] + B = 0.1 * [0.8+0.1im 0.1im 0.0; 0.0im 1.2+0.2im 0.1; 0.0im 0.1 0.9+0.1im] + C = 0.1 * [0.5+0.1im 0.0im 0.1; 0.1im 0.8+0.2im 0.0; 0.0im 0.0 0.7+0.1im] + D = I(N) .+ 0.15 * [0.9+0.1im 0.0im 0.05; 0.0im 1.0+0.2im 0.0; 0.1+0.1im 0.0 0.95+0.1im] + + prop.block_upper_ic[:, :, 1] .= A + prop.block_lower_ic[:, :, 1] .= B + prop.block_upper_ic[:, :, 2] .= C + prop.block_lower_ic[:, :, 2] .= D + + u1_in = [1.0+0.5im 0.2im 0.0; + 0.1+0.1im 1.2+0.1im 0.0; + 0.0im 0.0 0.9+0.3im] + u2_in = I(N) .+ 0.1im * ones(N, N) + + odet = GeneralizedPerturbedEquilibrium.ForceFreeStates.OdeState(N, 10, 5, 0) + odet.u[:, :, 1] .= u1_in + odet.u[:, :, 2] .= u2_in + + # Round-trip: inverse then forward = identity + GeneralizedPerturbedEquilibrium.ForceFreeStates.apply_propagator_inverse!(odet, prop) + GeneralizedPerturbedEquilibrium.ForceFreeStates.apply_propagator!(odet, prop) + + @test odet.u[:, :, 1] ≈ u1_in rtol=1e-12 + @test odet.u[:, :, 2] ≈ u2_in rtol=1e-12 + end + + @testset "balance_integration_chunks produces target count" begin + # Verify that balance_integration_chunks creates at least + # max(2*msing+3, 4*nthreads) chunks from a small set of base chunks. + ex = joinpath(@__DIR__, "test_data", "regression_solovev_ideal_example") + inputs = TOML.parsefile(joinpath(ex, "gpec.toml")) + inputs["ForceFreeStates"]["verbose"] = false + intr = GeneralizedPerturbedEquilibrium.ForceFreeStates.ForceFreeStatesInternal(; dir_path=ex) + ctrl = GeneralizedPerturbedEquilibrium.ForceFreeStates.ForceFreeStatesControl(; + (Symbol(k) => v for (k, v) in inputs["ForceFreeStates"])...) + eq_config = GeneralizedPerturbedEquilibrium.Equilibrium.EquilibriumConfig(inputs["Equilibrium"], ex) + equil = GeneralizedPerturbedEquilibrium.Equilibrium.setup_equilibrium(eq_config) + GeneralizedPerturbedEquilibrium.ForceFreeStates.sing_lim!(intr, ctrl, equil) + intr.nlow = ctrl.nn_low; intr.nhigh = ctrl.nn_high; intr.npert = 1 + GeneralizedPerturbedEquilibrium.ForceFreeStates.sing_find!(intr, equil) + intr.mlow = min(intr.nlow * equil.params.qmin, 0) - 4 - ctrl.delta_mlow + intr.mhigh = trunc(Int, intr.nhigh * equil.params.qmax) + ctrl.delta_mhigh + intr.mpert = intr.mhigh - intr.mlow + 1 + intr.mband = intr.mpert - 1 + intr.numpert_total = intr.mpert * intr.npert + + odet = GeneralizedPerturbedEquilibrium.ForceFreeStates.OdeState(intr.numpert_total, ctrl.numsteps_init, ctrl.numunorms_init, intr.msing) + GeneralizedPerturbedEquilibrium.ForceFreeStates.initialize_el_at_axis!(odet, ctrl, equil.profiles, intr) + + base_chunks = GeneralizedPerturbedEquilibrium.ForceFreeStates.chunk_el_integration_bounds(odet, ctrl, intr) + balanced = GeneralizedPerturbedEquilibrium.ForceFreeStates.balance_integration_chunks(base_chunks, ctrl, intr) + + target_n = max(2 * intr.msing + 3, 4 * Threads.nthreads()) + + # After balancing, should have at least target_n chunks + @test length(balanced) >= min(target_n, length(base_chunks) * 50) + + # First chunk starts at the correct position, last chunk ends at the edge + @test balanced[1].psi_start ≈ base_chunks[1].psi_start + @test balanced[end].psi_end ≈ base_chunks[end].psi_end + + # Consecutive chunks are contiguous UNLESS the previous chunk ends with a + # crossing (needs_crossing=true), in which case there is an intentional inner-layer + # gap of ≈2·singfac_min/|n·q1| between the pre-crossing and post-crossing intervals. + for i in eachindex(balanced)[2:end] + if !balanced[i-1].needs_crossing + @test balanced[i].psi_start ≈ balanced[i-1].psi_end rtol=1e-10 + else + # Inner-layer gap: post-crossing chunk starts AFTER the rational surface + @test balanced[i].psi_start > balanced[i-1].psi_end + end + end + + # The total number of needs_crossing=true chunks should equal the original + n_crossings_base = count(c -> c.needs_crossing, base_chunks) + n_crossings_bal = count(c -> c.needs_crossing, balanced) + @test n_crossings_bal == n_crossings_base + end + + @testset "chunk_el_integration_bounds direction field — bidirectional mode" begin + # Verify that bidirectional=true sets direction=-1 on crossing chunks and direction=+1 + # on non-crossing chunks, and that balance_integration_chunks propagates these correctly: + # the right sub-chunk inherits direction from the parent, the left sub-chunk is always +1. + ex = joinpath(@__DIR__, "test_data", "regression_solovev_ideal_example") + inputs = TOML.parsefile(joinpath(ex, "gpec.toml")) + inputs["ForceFreeStates"]["verbose"] = false + intr = GeneralizedPerturbedEquilibrium.ForceFreeStates.ForceFreeStatesInternal(; dir_path=ex) + ctrl = GeneralizedPerturbedEquilibrium.ForceFreeStates.ForceFreeStatesControl(; + (Symbol(k) => v for (k, v) in inputs["ForceFreeStates"])...) + eq_config = GeneralizedPerturbedEquilibrium.Equilibrium.EquilibriumConfig(inputs["Equilibrium"], ex) + equil = GeneralizedPerturbedEquilibrium.Equilibrium.setup_equilibrium(eq_config) + GeneralizedPerturbedEquilibrium.ForceFreeStates.sing_lim!(intr, ctrl, equil) + intr.nlow = ctrl.nn_low; intr.nhigh = ctrl.nn_high; intr.npert = 1 + GeneralizedPerturbedEquilibrium.ForceFreeStates.sing_find!(intr, equil) + intr.mlow = min(intr.nlow * equil.params.qmin, 0) - 4 - ctrl.delta_mlow + intr.mhigh = trunc(Int, intr.nhigh * equil.params.qmax) + ctrl.delta_mhigh + intr.mpert = intr.mhigh - intr.mlow + 1 + intr.mband = intr.mpert - 1 + intr.numpert_total = intr.mpert * intr.npert + + odet = GeneralizedPerturbedEquilibrium.ForceFreeStates.OdeState(intr.numpert_total, ctrl.numsteps_init, ctrl.numunorms_init, intr.msing) + GeneralizedPerturbedEquilibrium.ForceFreeStates.initialize_el_at_axis!(odet, ctrl, equil.profiles, intr) + + # Default (bidirectional=false): all chunks should have direction=+1 + chunks_fwd = GeneralizedPerturbedEquilibrium.ForceFreeStates.chunk_el_integration_bounds(odet, ctrl, intr) + @test all(c -> c.direction == 1, chunks_fwd) + + # bidirectional=true: crossing chunks direction=-1, non-crossing direction=+1 + chunks_bidi = GeneralizedPerturbedEquilibrium.ForceFreeStates.chunk_el_integration_bounds(odet, ctrl, intr; bidirectional=true) + @test count(c -> c.needs_crossing, chunks_bidi) > 0 # at least one crossing chunk + for chunk in chunks_bidi + if chunk.needs_crossing + @test chunk.direction == -1 + else + @test chunk.direction == 1 + end + end + + # balance_integration_chunks preserves direction: right sub-chunk inherits parent direction, + # left sub-chunk is always +1 regardless of parent + balanced_bidi = GeneralizedPerturbedEquilibrium.ForceFreeStates.balance_integration_chunks(chunks_bidi, ctrl, intr) + for chunk in balanced_bidi + if chunk.needs_crossing + @test chunk.direction == -1 + else + @test chunk.direction == 1 + end + end + end + + @testset "Parallel FM integration matches standard ODE — Solovev example" begin + # Run standard and parallel FM integrations on the Solovev regression test. + # The energy eigenvalue et[1] should match to within 2%. + # + # Bidirectional FM integration (crossing chunks integrated backward) is the + # default for use_parallel=true. It keeps FM propagators well-conditioned for + # both small-N (Solovev N=8, tested here) and large-N (DIIID N=26, tested below). + ex = joinpath(@__DIR__, "test_data", "regression_solovev_ideal_example") + + function run_solovev(use_parallel) + inputs = TOML.parsefile(joinpath(ex, "gpec.toml")) + inputs["ForceFreeStates"]["verbose"] = false + inputs["ForceFreeStates"]["use_parallel"] = use_parallel + intr = GeneralizedPerturbedEquilibrium.ForceFreeStates.ForceFreeStatesInternal(; dir_path=ex) + ctrl = GeneralizedPerturbedEquilibrium.ForceFreeStates.ForceFreeStatesControl(; + (Symbol(k) => v for (k, v) in inputs["ForceFreeStates"])...) + eq_config = GeneralizedPerturbedEquilibrium.Equilibrium.EquilibriumConfig(inputs["Equilibrium"], ex) + equil = GeneralizedPerturbedEquilibrium.Equilibrium.setup_equilibrium(eq_config) + intr.wall_settings = GeneralizedPerturbedEquilibrium.Vacuum.WallShapeSettings(; + (Symbol(k) => v for (k, v) in inputs["Wall"])...) + GeneralizedPerturbedEquilibrium.ForceFreeStates.sing_lim!(intr, ctrl, equil) + intr.nlow = ctrl.nn_low; intr.nhigh = ctrl.nn_high; intr.npert = 1 + GeneralizedPerturbedEquilibrium.ForceFreeStates.sing_find!(intr, equil) + intr.mlow = min(intr.nlow * equil.params.qmin, 0) - 4 - ctrl.delta_mlow + intr.mhigh = trunc(Int, intr.nhigh * equil.params.qmax) + ctrl.delta_mhigh + intr.mpert = intr.mhigh - intr.mlow + 1 + intr.mband = intr.mpert - 1 + intr.numpert_total = intr.mpert * intr.npert + metric = GeneralizedPerturbedEquilibrium.ForceFreeStates.make_metric(equil; mband=intr.mband, fft_flag=ctrl.fft_flag) + ffit = GeneralizedPerturbedEquilibrium.ForceFreeStates.make_matrix(equil, intr, metric) + odet = GeneralizedPerturbedEquilibrium.ForceFreeStates.eulerlagrange_integration(ctrl, equil, ffit, intr) + vac = GeneralizedPerturbedEquilibrium.ForceFreeStates.free_run!(odet, ctrl, equil, ffit, intr) + return real(vac.et[1]), intr + end + + et_std, intr_std = run_solovev(false) + et_par, intr_par = run_solovev(true) + + # Energy eigenvalue matches to 2% + @test isapprox(et_par, et_std; rtol=0.02) + + # Δ' is populated for every singular surface (finite values) + # Note: the FM parallel path computes Δ' from ca_l/ca_r accumulated in (S,I) + # normalization (Riccati-style crossings). This differs from the sequential path's + # (U1,U2) normalization, so absolute Δ' values are not compared here. + @test all(s -> !isempty(s.delta_prime), intr_par.sing) + @test all(s -> all(isfinite, s.delta_prime), intr_par.sing) + + # delta_prime_col is populated and has the correct shape (N × n_res_modes) + N = intr_par.numpert_total + @test all(s -> !isempty(s.delta_prime_col), intr_par.sing) + @test all(s -> size(s.delta_prime_col, 1) == N, intr_par.sing) + @test all(s -> size(s.delta_prime_col, 2) == length(s.delta_prime), intr_par.sing) + + # Diagonal of delta_prime_col matches delta_prime (consistency check) + for s in intr_par.sing + ipert_res_vals = 1 .+ s.m .- intr_par.mlow .+ (s.n .- intr_par.nlow) .* intr_par.mpert + for (i, ipr) in enumerate(ipert_res_vals) + @test s.delta_prime_col[ipr, i] ≈ s.delta_prime[i] rtol=1e-10 + end + end + end + + @testset "Parallel FM integration matches standard ODE — DIIID-like example (large N)" begin + # Run standard and parallel FM integrations on the DIIID-like example (N≈26 modes). + # Before bidirectional integration, the all-forward FM propagators were ill-conditioned + # for large N, producing ~10% energy error. Bidirectional integration (backward crossing + # chunks + forward intermediate chunks) restores accuracy to within 2%. + # + # This is the key regression test for the bidirectional parallel FM fix. + ex = joinpath(@__DIR__, "..", "examples", "DIIID-like_ideal_example") + + function run_diiid(use_parallel) + inputs = TOML.parsefile(joinpath(ex, "gpec.toml")) + inputs["ForceFreeStates"]["verbose"] = false + inputs["ForceFreeStates"]["use_parallel"] = use_parallel + inputs["ForceFreeStates"]["write_outputs_to_HDF5"] = false + intr = GeneralizedPerturbedEquilibrium.ForceFreeStates.ForceFreeStatesInternal(; dir_path=ex) + ctrl = GeneralizedPerturbedEquilibrium.ForceFreeStates.ForceFreeStatesControl(; + (Symbol(k) => v for (k, v) in inputs["ForceFreeStates"])...) + eq_config = GeneralizedPerturbedEquilibrium.Equilibrium.EquilibriumConfig(inputs["Equilibrium"], ex) + equil = GeneralizedPerturbedEquilibrium.Equilibrium.setup_equilibrium(eq_config) + intr.wall_settings = GeneralizedPerturbedEquilibrium.Vacuum.WallShapeSettings(; + (Symbol(k) => v for (k, v) in inputs["Wall"])...) + GeneralizedPerturbedEquilibrium.ForceFreeStates.sing_lim!(intr, ctrl, equil) + intr.nlow = ctrl.nn_low; intr.nhigh = ctrl.nn_high; intr.npert = 1 + GeneralizedPerturbedEquilibrium.ForceFreeStates.sing_find!(intr, equil) + intr.mlow = min(intr.nlow * equil.params.qmin, 0) - 4 - ctrl.delta_mlow + intr.mhigh = trunc(Int, intr.nhigh * equil.params.qmax) + ctrl.delta_mhigh + intr.mpert = intr.mhigh - intr.mlow + 1 + intr.mband = intr.mpert - 1 + intr.numpert_total = intr.mpert * intr.npert + metric = GeneralizedPerturbedEquilibrium.ForceFreeStates.make_metric(equil; mband=intr.mband, fft_flag=ctrl.fft_flag) + ffit = GeneralizedPerturbedEquilibrium.ForceFreeStates.make_matrix(equil, intr, metric) + odet = GeneralizedPerturbedEquilibrium.ForceFreeStates.eulerlagrange_integration(ctrl, equil, ffit, intr) + vac = GeneralizedPerturbedEquilibrium.ForceFreeStates.free_run!(odet, ctrl, equil, ffit, intr) + return real(vac.et[1]) + end + + et_std = run_diiid(false) + et_par = run_diiid(true) + + # Energy eigenvalue matches to 2% (bidirectional fix: was ~10% error without it) + @test isapprox(et_par, et_std; rtol=0.02) + end + + @testset "ode_itime_cost is additive over sub-intervals" begin + # Verify cost(a, c) ≈ cost(a, b) + cost(b, c) for b ∈ (a, c) where no + # rational surface is inside [a, c]. The cost function uses abs(Δlog) for + # each reference point; this is additive only when |psi - ref| is monotone + # on [a, c], i.e., when no reference (rational surface, axis, edge) lies + # strictly inside the interval. We use the first integration chunk from + # chunk_el_integration_bounds, which is guaranteed to contain no rational + # surfaces in its interior. + ex = joinpath(@__DIR__, "test_data", "regression_solovev_ideal_example") + inputs = TOML.parsefile(joinpath(ex, "gpec.toml")) + inputs["ForceFreeStates"]["verbose"] = false + intr = GeneralizedPerturbedEquilibrium.ForceFreeStates.ForceFreeStatesInternal(; dir_path=ex) + ctrl = GeneralizedPerturbedEquilibrium.ForceFreeStates.ForceFreeStatesControl(; + (Symbol(k) => v for (k, v) in inputs["ForceFreeStates"])...) + eq_config = GeneralizedPerturbedEquilibrium.Equilibrium.EquilibriumConfig(inputs["Equilibrium"], ex) + equil = GeneralizedPerturbedEquilibrium.Equilibrium.setup_equilibrium(eq_config) + GeneralizedPerturbedEquilibrium.ForceFreeStates.sing_lim!(intr, ctrl, equil) + intr.nlow = ctrl.nn_low; intr.nhigh = ctrl.nn_high; intr.npert = 1 + GeneralizedPerturbedEquilibrium.ForceFreeStates.sing_find!(intr, equil) + intr.mpert = 8; intr.numpert_total = 8 + + # Use the first chunk from chunk_el_integration_bounds: guaranteed rational-free interior + odet_tmp = GeneralizedPerturbedEquilibrium.ForceFreeStates.OdeState(8, 10, 5, intr.msing) + GeneralizedPerturbedEquilibrium.ForceFreeStates.initialize_el_at_axis!(odet_tmp, ctrl, equil.profiles, intr) + chunks_tmp = GeneralizedPerturbedEquilibrium.ForceFreeStates.chunk_el_integration_bounds(odet_tmp, ctrl, intr) + chunk1 = chunks_tmp[1] + a = chunk1.psi_start + c = chunk1.psi_end + b = (a + c) / 2.0 + + cost_ac = GeneralizedPerturbedEquilibrium.ForceFreeStates.ode_itime_cost(a, c, intr) + cost_ab = GeneralizedPerturbedEquilibrium.ForceFreeStates.ode_itime_cost(a, b, intr) + cost_bc = GeneralizedPerturbedEquilibrium.ForceFreeStates.ode_itime_cost(b, c, intr) + + @test isapprox(cost_ac, cost_ab + cost_bc; rtol=1e-10) + end + + @testset "delta_prime_matrix — STRIDE BVP Solovev regression" begin + # Verify that the parallel FM path computes a well-formed inter-surface Δ' matrix + # via the STRIDE global BVP [Glasser 2018 Phys. Plasmas 25, 032501]. + # Shape: (2·msing × 2·msing), where index 2j-1 = left side and 2j = right side + # of surface j. Each entry is the U₂[ipert_res] response amplitude for one + # driving configuration. + ex = joinpath(@__DIR__, "test_data", "regression_solovev_ideal_example") + inputs = TOML.parsefile(joinpath(ex, "gpec.toml")) + inputs["ForceFreeStates"]["verbose"] = false + inputs["ForceFreeStates"]["use_parallel"] = true + intr = GeneralizedPerturbedEquilibrium.ForceFreeStates.ForceFreeStatesInternal(; dir_path=ex) + ctrl = GeneralizedPerturbedEquilibrium.ForceFreeStates.ForceFreeStatesControl(; + (Symbol(k) => v for (k, v) in inputs["ForceFreeStates"])...) + eq_config = GeneralizedPerturbedEquilibrium.Equilibrium.EquilibriumConfig(inputs["Equilibrium"], ex) + equil = GeneralizedPerturbedEquilibrium.Equilibrium.setup_equilibrium(eq_config) + intr.wall_settings = GeneralizedPerturbedEquilibrium.Vacuum.WallShapeSettings(; + (Symbol(k) => v for (k, v) in inputs["Wall"])...) + GeneralizedPerturbedEquilibrium.ForceFreeStates.sing_lim!(intr, ctrl, equil) + intr.nlow = ctrl.nn_low; intr.nhigh = ctrl.nn_high; intr.npert = 1 + GeneralizedPerturbedEquilibrium.ForceFreeStates.sing_find!(intr, equil) + intr.mlow = min(intr.nlow * equil.params.qmin, 0) - 4 - ctrl.delta_mlow + intr.mhigh = trunc(Int, intr.nhigh * equil.params.qmax) + ctrl.delta_mhigh + intr.mpert = intr.mhigh - intr.mlow + 1 + intr.mband = intr.mpert - 1 + intr.numpert_total = intr.mpert * intr.npert + metric = GeneralizedPerturbedEquilibrium.ForceFreeStates.make_metric(equil; mband=intr.mband, fft_flag=ctrl.fft_flag) + ffit = GeneralizedPerturbedEquilibrium.ForceFreeStates.make_matrix(equil, intr, metric) + GeneralizedPerturbedEquilibrium.ForceFreeStates.eulerlagrange_integration(ctrl, equil, ffit, intr) + + msing = intr.msing + dpm = intr.delta_prime_matrix + + # Matrix is populated with correct shape (2·msing × 2·msing) + @test !isempty(dpm) + @test size(dpm) == (2 * msing, 2 * msing) + + # All elements are finite + @test all(isfinite, dpm) + + # Diagonal (self-response) elements are non-zero for each surface side + for j in 1:msing + @test abs(dpm[2j-1, 2j-1]) > 1e-10 + @test abs(dpm[2j, 2j ]) > 1e-10 + end + end + + @testset "delta_prime_matrix — STRIDE BVP DIIID-like regression (large N)" begin + # Verify that the parallel FM path computes a well-formed inter-surface Δ' matrix + # for the DIIID-like case (N≈26 modes, multiple rational surfaces). This complements + # the Solovev test above by exercising the BVP assembly with more surfaces and larger + # mode space, where ill-conditioned (non-bidirectional) FM propagators would fail. + ex = joinpath(@__DIR__, "..", "examples", "DIIID-like_ideal_example") + inputs = TOML.parsefile(joinpath(ex, "gpec.toml")) + inputs["ForceFreeStates"]["verbose"] = false + inputs["ForceFreeStates"]["use_parallel"] = true + inputs["ForceFreeStates"]["write_outputs_to_HDF5"] = false + intr = GeneralizedPerturbedEquilibrium.ForceFreeStates.ForceFreeStatesInternal(; dir_path=ex) + ctrl = GeneralizedPerturbedEquilibrium.ForceFreeStates.ForceFreeStatesControl(; + (Symbol(k) => v for (k, v) in inputs["ForceFreeStates"])...) + eq_config = GeneralizedPerturbedEquilibrium.Equilibrium.EquilibriumConfig(inputs["Equilibrium"], ex) + equil = GeneralizedPerturbedEquilibrium.Equilibrium.setup_equilibrium(eq_config) + intr.wall_settings = GeneralizedPerturbedEquilibrium.Vacuum.WallShapeSettings(; + (Symbol(k) => v for (k, v) in inputs["Wall"])...) + GeneralizedPerturbedEquilibrium.ForceFreeStates.sing_lim!(intr, ctrl, equil) + intr.nlow = ctrl.nn_low; intr.nhigh = ctrl.nn_high; intr.npert = 1 + GeneralizedPerturbedEquilibrium.ForceFreeStates.sing_find!(intr, equil) + intr.mlow = min(intr.nlow * equil.params.qmin, 0) - 4 - ctrl.delta_mlow + intr.mhigh = trunc(Int, intr.nhigh * equil.params.qmax) + ctrl.delta_mhigh + intr.mpert = intr.mhigh - intr.mlow + 1 + intr.mband = intr.mpert - 1 + intr.numpert_total = intr.mpert * intr.npert + metric = GeneralizedPerturbedEquilibrium.ForceFreeStates.make_metric(equil; mband=intr.mband, fft_flag=ctrl.fft_flag) + ffit = GeneralizedPerturbedEquilibrium.ForceFreeStates.make_matrix(equil, intr, metric) + GeneralizedPerturbedEquilibrium.ForceFreeStates.eulerlagrange_integration(ctrl, equil, ffit, intr) + + msing = intr.msing + dpm = intr.delta_prime_matrix + + # Matrix is populated with correct shape (2·msing × 2·msing) + @test !isempty(dpm) + @test size(dpm) == (2 * msing, 2 * msing) + + # All elements are finite + @test all(isfinite, dpm) + + # Diagonal (self-response) elements are non-zero for each surface side + for j in 1:msing + @test abs(dpm[2j-1, 2j-1]) > 1e-10 + @test abs(dpm[2j, 2j ]) > 1e-10 + end + end + +end diff --git a/test/runtests_riccati.jl b/test/runtests_riccati.jl new file mode 100644 index 00000000..5681b691 --- /dev/null +++ b/test/runtests_riccati.jl @@ -0,0 +1,251 @@ +using LinearAlgebra, Random, TOML + +const FFS = GeneralizedPerturbedEquilibrium.ForceFreeStates + +# Configure a fresh ForceFreeStatesInternal from an already-built equilibrium. +# Cheap (sing_lim! + sing_find! + field assignment). Separate from equil/ffit +# setup because intr is mutated by each integration (sing[s].delta_prime etc.). +function make_solovev_intr(inputs, ctrl, equil, ex) + intr = FFS.ForceFreeStatesInternal(; dir_path=ex) + intr.wall_settings = GeneralizedPerturbedEquilibrium.Vacuum.WallShapeSettings(; + (Symbol(k) => v for (k, v) in inputs["Wall"])...) + FFS.sing_lim!(intr, ctrl, equil) + intr.nlow = ctrl.nn_low; intr.nhigh = ctrl.nn_high; intr.npert = 1 + FFS.sing_find!(intr, equil) + intr.mlow = min(intr.nlow * equil.params.qmin, 0) - 4 - ctrl.delta_mlow + intr.mhigh = trunc(Int, intr.nhigh * equil.params.qmax) + ctrl.delta_mhigh + intr.mpert = intr.mhigh - intr.mlow + 1 + intr.mband = intr.mpert - 1 + intr.numpert_total = intr.mpert * intr.npert + return intr +end + +@testset "Riccati Integration Tests" begin + + # ── Pure matrix unit tests — no equilibrium needed ──────────────────────── + + @testset "renormalize_riccati_inplace!" begin + N = 4 + # Build a random (U₁, U₂) pair and verify renorm gives S = U₁·U₂⁻¹ with U₂_new = I + rng = [1.0+0.5im 0.2im 0.1 0.3im; + 0.0 1.2+0.1im 0.0im 0.2; + 0.1+0.1im 0.0 0.9+0.3im 0.1im; + 0.0im 0.2 0.0 1.1+0.2im] + U1 = rng .+ 0.5*I(N) + U2 = 0.5*rng .+ I(N) # near-identity to ensure invertibility + + u = zeros(ComplexF64, N, N, 2) + u[:, :, 1] .= U1 + u[:, :, 2] .= U2 + + S_expected = U1 / U2 # = U₁ · U₂⁻¹ + + FFS.renormalize_riccati_inplace!(u, N) + + @test u[:, :, 2] ≈ I(N) + @test u[:, :, 1] ≈ S_expected rtol=1e-12 + end + + @testset "renormalize_riccati_inplace! idempotent" begin + N = 3 + # If U₂ = I already, renorm should leave u unchanged + S = [1.0+0.5im 0.2im 0.1; + 0.0im 1.2+0.1im 0.0; + 0.1+0.1im 0.0 0.9+0.3im] + u = zeros(ComplexF64, N, N, 2) + u[:, :, 1] .= S + u[:, :, 2] .= I(N) + + FFS.renormalize_riccati_inplace!(u, N) + + @test u[:, :, 2] ≈ I(N) + @test u[:, :, 1] ≈ S rtol=1e-12 + end + + @testset "renormalize_riccati! (OdeState)" begin + N = 3 + rng = [1.0+0.5im 0.2im 0.1; + 0.0im 1.2+0.1im 0.0; + 0.1+0.1im 0.0 0.9+0.3im] + U1 = rng .+ 0.5*I(N) + U2 = 0.2*rng .+ I(N) + + odet = FFS.OdeState(N, 10, 5, 1) + odet.u[:, :, 1] .= U1 + odet.u[:, :, 2] .= U2 + + S_expected = U1 / U2 + intr = FFS.ForceFreeStatesInternal(; mpert=N, numpert_total=N) + + FFS.renormalize_riccati!(odet, intr) + + @test odet.u[:, :, 2] ≈ I(N) + @test odet.u[:, :, 1] ≈ S_expected rtol=1e-12 + end + + # ── Shared Solovev setup ────────────────────────────────────────────────── + # + # equil (Grad-Shafranov solve) and ffit (metric matrices) are expensive and + # immutable after construction — built ONCE and shared across all tests below. + # intr is cheap to (re)initialize but is mutated by each integration run + # (sing[s].delta_prime etc.), so a fresh copy is made for each integration. + # + # Integration runs: + # intr_ric / odet_ric — Riccati path (shared by most tests) + # intr_std / odet_std — Standard path (energy comparison only) + + ex = joinpath(@__DIR__, "test_data", "regression_solovev_ideal_example") + inputs = TOML.parsefile(joinpath(ex, "gpec.toml")) + inputs["ForceFreeStates"]["verbose"] = false + + ctrl = FFS.ForceFreeStatesControl(; + (Symbol(k) => v for (k, v) in inputs["ForceFreeStates"])...) + equil = GeneralizedPerturbedEquilibrium.Equilibrium.setup_equilibrium( + GeneralizedPerturbedEquilibrium.Equilibrium.EquilibriumConfig(inputs["Equilibrium"], ex)) + + intr_tmp = make_solovev_intr(inputs, ctrl, equil, ex) + metric = FFS.make_metric(equil; mband=intr_tmp.mband, fft_flag=ctrl.fft_flag) + ffit = FFS.make_matrix(equil, intr_tmp, metric) + N = intr_tmp.numpert_total + + # Riccati integration + intr_ric = make_solovev_intr(inputs, ctrl, equil, ex) + odet_ric = FFS.riccati_eulerlagrange_integration(ctrl, equil, ffit, intr_ric) + + # Save inline Δ' values before any test that calls compute_delta_prime_from_ca! + # (which overwrites intr_ric.sing[s].delta_prime) + delta_prime_inline = [copy(intr_ric.sing[s].delta_prime) for s in 1:intr_ric.msing] + + vac_ric = FFS.free_run!(odet_ric, ctrl, equil, ffit, intr_ric) + et_ric = real(vac_ric.et[1]) + + # Standard integration (needed only for energy comparison) + intr_std = make_solovev_intr(inputs, ctrl, equil, ex) + odet_std = FFS.eulerlagrange_integration(ctrl, equil, ffit, intr_std) + vac_std = FFS.free_run!(odet_std, ctrl, equil, ffit, intr_std) + et_std = real(vac_std.et[1]) + + # ───────────────────────────────────────────────────────────────────────── + + @testset "Riccati integration matches standard ODE — Solovev example" begin + # Energy eigenvalue matches to 1% + @test isapprox(et_ric, et_std; rtol=0.01) + + # Riccati uses no more than 2x as many steps as standard + @test odet_ric.step <= 2 * odet_std.step + end + + @testset "Δ' computed by Riccati path — Solovev regression" begin + # Verify that the Riccati path populates delta_prime with physically correct values. + # + # The Riccati path computes Δ' in the bounded (U₁, U₂) normalization: before the + # crossing, the callback guarantees max(|U₁|, |U₂|) ≤ ucrit, and the asymptotic is + # introduced directly in column ipert_res (no GR permutation). This gives: + # ca_r[ipert_res, ipert_res, 2] = 1 (exactly, by construction) + # Δ' = (1 - ca_l[ipert_res, ipert_res, 2]) / (4π²·psio) + # + # The standard path uses Gaussian Reduction which inflates the resonant column's + # asymptotic coefficients, so it does NOT populate intr.sing[s].delta_prime. + # Use SingularCoupling.jl (which reads ca_l/ca_r directly) for standard-path Δ'. + + # Riccati path should populate delta_prime for every singular surface + @test all(s -> !isempty(s.delta_prime), intr_ric.sing) + + # All Riccati Δ' values should be finite + @test all(s -> all(isfinite, s.delta_prime), intr_ric.sing) + + # Regression: Solovev Δ' values (in the bounded Riccati normalization). + # Positive Δ' (surface 1) and negative Δ' (surface 2) are both physically plausible + # for this configuration. + @test isapprox(real(intr_ric.sing[1].delta_prime[1]), 57.3; rtol=0.05) + @test isapprox(real(intr_ric.sing[2].delta_prime[1]), -4.03; rtol=0.05) + + # delta_prime_col is populated, has correct shape (N × n_res_modes), and + # its diagonal elements match delta_prime exactly. + @test all(s -> !isempty(s.delta_prime_col), intr_ric.sing) + @test all(s -> size(s.delta_prime_col, 1) == N, intr_ric.sing) + @test all(s -> size(s.delta_prime_col, 2) == length(s.delta_prime), intr_ric.sing) + for s in intr_ric.sing + ipert_res_vals = 1 .+ s.m .- intr_ric.mlow .+ (s.n .- intr_ric.nlow) .* intr_ric.mpert + for (i, ipr) in enumerate(ipert_res_vals) + @test s.delta_prime_col[ipr, i] ≈ s.delta_prime[i] rtol=1e-10 + end + end + end + + @testset "Riccati end state has U₂ ≈ I" begin + # After riccati_eulerlagrange_integration, odet.u[:,:,2] should be identity + # (canonical Riccati convention after final renorm) + @test odet_ric.u[:, :, 2] ≈ I(N) rtol=1e-10 + end + + @testset "riccati_der! formula — Glasser 2018 Eq. 19" begin + # Verify riccati_der! correctly evaluates dS/dψ = w†·F̄⁻¹·w − S·Ḡ·S, w = Q − K̄·S. + # + # Test states are Hermitian (physical constraint: the EL system preserves S†=S from + # the axis). Non-Hermitian states would give ~5% disagreement — not a bug, but a + # consequence of the derivation assuming the physical symmetry. + # + # See benchmarks/benchmark_riccati_der.jl for the extended version with output. + + # Use an initialized OdeState just for spline_hint and chunk bounds + odet_tmp = FFS.OdeState(N, ctrl.numsteps_init, ctrl.numunorms_init, intr_ric.msing) + FFS.initialize_el_at_axis!(odet_tmp, ctrl, equil.profiles, intr_ric) + chunks = FFS.chunk_el_integration_bounds(odet_tmp, ctrl, intr_ric) + + # 30% into each chunk: away from singularities at psi_end + test_psis = [c.psi_start + 0.3 * (c.psi_end - c.psi_start) for c in chunks] + + rng = Random.MersenneTwister(42) + for psi in test_psis + # Hermitian S: physical Riccati matrix is Hermitian (preserved by EL symmetry) + A = randn(rng, ComplexF64, N, N) + S = (A + A') / 2 + + # Manual RHS: w†·F̄⁻¹·w − S·Ḡ·S + L = zeros(ComplexF64, N, N) + Kmat = zeros(ComplexF64, N, N) + Gmat = zeros(ComplexF64, N, N) + ffit.fmats_lower(vec(L), psi; hint=ffit._hint) + ffit.kmats(vec(Kmat), psi; hint=ffit._hint) + ffit.gmats(vec(Gmat), psi; hint=ffit._hint) + q = equil.profiles.q_spline(psi) + singfac = vec(1.0 ./ ((intr_ric.mlow:intr_ric.mhigh) .- + q .* (intr_ric.nlow:intr_ric.nhigh)')) + w = -Kmat * S + for i in 1:N; w[i, i] += singfac[i]; end + v = copy(w) + ldiv!(LowerTriangular(L), v) + ldiv!(UpperTriangular(L'), v) + dS_manual = adjoint(w) * v - S * Gmat * S + + # riccati_der! RHS + u_ric = zeros(ComplexF64, N, N, 2) + du_ric = zeros(ComplexF64, N, N, 2) + u_ric[:, :, 1] .= S + u_ric[:, :, 2] .= Matrix{ComplexF64}(I, N, N) + dummy = FFS.IntegrationChunk(psi, psi, false, 0, 1) + params = (ctrl, equil, ffit, intr_ric, odet_tmp, dummy) + FFS.riccati_der!(du_ric, u_ric, params, psi) + + rel_err = norm(du_ric[:, :, 1] - dS_manual) / max(norm(dS_manual), 1e-10) + @test rel_err < 1e-10 + end + end + + @testset "compute_delta_prime_from_ca! matches inline Δ'" begin + # Verify the standalone Δ' formula matches the inline Riccati crossing computation. + # Both apply the identical diagonal formula to the same ca_l/ca_r arrays, so the + # result must be bit-for-bit identical (not just approximately equal). + # + # Note: this call overwrites intr_ric.sing[s].delta_prime; delta_prime_inline was + # saved before free_run! above so it holds the original inline values. + # + # See benchmarks/benchmark_delta_prime_methods.jl for the extended version. + FFS.compute_delta_prime_from_ca!(odet_ric, intr_ric, equil) + for s in 1:intr_ric.msing + @test intr_ric.sing[s].delta_prime == delta_prime_inline[s] + end + end + +end