This example shows how to calculate the $\text{GALI}_k$ indicator for the Fermi-Pasta-Ulam system using an adaptive integrator.


In [1]:
# Import libraries
import numpy as np
from numba import cuda, float64
import math
import time
import sys
import os
try:
    from chaoticus import (create_solver_kernel_adaptive_variational,
                                 calc_GALI)
except ImportError:
    print("Error: Could not import functions from 'chaoticus.py'.")
    print("Make sure the file exists and is in the correct path.")
    sys.exit(1)

The Fermi-Pasta-Ulam system Hamiltonian is given by:

$$
    \mathcal{H}(\boldsymbol{q}, \boldsymbol{p}, k, \alpha, \beta) = \sum_{i = 1}^{N} \dfrac{p^2_{i}}{2} + \dfrac{k}{2} \sum_{i = 1}^{N} \left( q_{i+1} - q_{i} \right)^2 + \dfrac{\alpha}{3} \sum_{i = 1}^{N} \left( q_{i+1} - q_{i} \right)^3 + \dfrac{\beta}{4} \sum_{i = 1}^{N} \left( q_{i+1} - q_{i} \right)^4
$$
where $N$ is the number of particles (DoF of the system), $q_i$ and $p_i$ are the position and momentum of the $i$-th particle, $k$ models the linear interaction, $\alpha$ and $\beta$ are the nonlinear coefficients.

Hamilton's equations of motion are:
$$
    \begin{cases}
        \dot{q}_i &= \frac{\partial \mathcal{H}}{\partial p_i} = p_i \\[.2cm]
        \dot{p}_i &= - \frac{\partial \mathcal{H}}{\partial q_i} = - k \left[ \Delta q_i - \Delta q_{i+1} \right] - \alpha \left[ (\Delta q_i)^2 - (\Delta q_{i+1})^2 \right] - \beta \left[ (\Delta q_i)^3 - (\Delta q_{i+1})^3 \right]
    \end{cases}
$$
with $\Delta q_i = q_i - q_{i-1}$ and $\Delta q_{i+1} = q_{i+1} - q_{i}$.

For this system, the variational equations are:

$$
    \begin{cases}
        \dot{\delta q}_i &= \delta p_i \\
        \dot{\delta p}_i &= - \sum_{j = 1}^{N} \dfrac{\partial^{2} \mathcal{H}}{\partial q_i \partial q_j} \delta q_j
    \end{cases}
$$
and three possible cases for the second partial derivative of the Hamiltonian are:

$$
    \begin{cases}
        \dfrac{\partial^2 \mathcal{H}}{\partial q^2_{i}} &= 2k + 2\alpha \left( \Delta q_i + \Delta q_{i+1} \right) + 3\beta \left[ \left( \Delta q_i \right)^2 + \left( \Delta q_{i+1} \right)^2 \right] \\[.5cm]
        \dfrac{\partial^2 \mathcal{H}}{\partial q_{i} \partial q_{i-1}} &= - \left[ k + 2\alpha \Delta q_i + 3\beta \left( \Delta q_i \right)^2 \right] \\[.5cm]
        \dfrac{\partial^2 \mathcal{H}}{\partial q_{i} \partial q_{i+1}} &= - \left[ k + 2\alpha \Delta q_{i+1} + 3\beta \left( \Delta q_{i+1} \right)^2 \right]
    \end{cases}
$$

Once the $k$ deviation vectors are computed, the $\text{GALI}_k$ indicator is given by the product of the singular values of the matrix whose columns are the deviation vectors. Note that there are other ways to compute the $\text{GALI}_k$ indicator, but this is probably the most straightforward one.

One theoretical results that might be helpful in some scenarios is that SALI and $\text{GALI}_2$ are equivalent. However, it has recentrly been shown that $\text{GALI}_2$, and even $\text{GALI}_3$, might not be enough for a correct classifaction, specialliy in dissipative systems.

In [2]:
"""
Define the Hamiltonian function
"""
def Hamiltonian(q, p, N, k, a, b):
    T = 0.0
    for i in range(N):
        T += 0.5 * p[i]**2
    V = 0.0
    qi_prev = 0.0
    for i in range(N):
        diff = q[i] - qi_prev
        V += 0.5 * k * diff**2 + (a/3.0) * diff**3 + (b/4.0) * diff**4
        qi_prev = q[i]
    diff = 0.0 - qi_prev
    V += 0.5 * k * diff**2 + (a/3.0) * diff**3 + (b/4.0) * diff**4
    return T + V

"""
Define auxiliary functions for intermediate calculations
"""
@cuda.jit(device=True, inline=True)
def partial_diag(qi, qi_m1, qi_p1, k, a, b):
    # Compute the differences once.
    d_left = qi - qi_m1
    d_right = qi_p1 - qi
    return 2.0 * k + 2.0 * a * (d_left + d_right) + 3.0 * b * (d_left * d_left + d_right * d_right)

@cuda.jit(device=True, inline=True)
def partial_non_diag(qi, qj, k, a, b):
    d = qi - qj
    return - (k + 2.0 * a * d + 3.0 * b * d * d)

@cuda.jit(device=True, inline=True)
def p_i_dot(qi, qi_m1, qi_p1, k, a, b):
    d_left = qi - qi_m1
    d_right = qi_p1 - qi
    return - k * (d_left - d_right) - a * (d_left * d_left - d_right * d_right) - b * (d_left * d_left * d_left - d_right * d_right * d_right)

@cuda.jit(device=True, inline=True)
def build_hessian(q, N, k, a, b, H_mat):
    for i in range(N):
        qi = q[i]
        qi_m1 = q[i - 1] if i > 0 else 0.0
        qi_p1 = q[i + 1] if i < N - 1 else 0.0
        diag_val = partial_diag(qi, qi_m1, qi_p1, k, a, b)
        H_mat[i * N + i] = diag_val
        if i > 0:
            off_val = partial_non_diag(qi, qi_m1, k, a, b)
            H_mat[i * N + (i - 1)] = off_val
            H_mat[(i - 1) * N + i] = off_val
        if i < N - 1:
            off_val = partial_non_diag(qi, qi_p1, k, a, b)
            H_mat[i * N + (i + 1)] = off_val
            H_mat[(i + 1) * N + i] = off_val

"""
Define Hamilton's equations of motion and variational equations
"""
N_MAX = 16
 # Number of particles (It has to be defined as a global constant)
DIM_MATRIX = N_MAX * N_MAX
@cuda.jit(device=True, inline=True)
def ode_fpu_variational_adaptive(t, Y, dYdt, params):
    """
    Calculates derivatives for the FPU system including deviation vectors and LD.
    Adapted for the generic adaptive variational solver interface.

    State vector Y structure (size = 2*N*(1+M)+1):
    [q(N), p(N), dq0(N), dp0(N), ..., dqM-1(N), dpM-1(N), LD(1)]

    Args:
        t (float): Current time (unused).
        Y (array): Current state vector.
        dYdt (array): Output array for derivatives.
        params (tuple): System parameters (N, M, k, a, b).
    """
    # --- Unpack Parameters ---
    N, M, k, a, b = params
    num_base_vars = 2 * N # Convenience

    # --- Local Arrays ---
    # Allocate with max size, but use N for logic
    q_local = cuda.local.array(N_MAX, float64)
    p_local = cuda.local.array(N_MAX, float64)
    # Allocate max size for Hessian, operate on N*N part
    H_mat = cuda.local.array(DIM_MATRIX, dtype=float64)

    # --- Get Base Variables (up to N) ---
    for i in range(N):
        q_local[i] = Y[i]
        p_local[i] = Y[N + i]

    # --- Base Derivatives ---
    # dq/dt = p
    for i in range(N):
        dYdt[i] = p_local[i]
    # dp/dt = F(q)
    for i in range(N):
        q_i = q_local[i]
        q_im1 = q_local[i - 1] if i > 0 else 0.0
        q_ip1 = q_local[i + 1] if i < N - 1 else 0.0
        dYdt[N + i] = p_i_dot(q_i, q_im1, q_ip1, k, a, b)

    # --- LD Derivative (Base Variables) ---
    ld_index = num_base_vars * (1 + M) # Index of the single aux var (LD)
    ld_sum = 0.0
    # Sum sqrt(abs(derivative)) over base variables
    for i in range(num_base_vars): # Sum over 2N base derivatives
         ld_sum += math.sqrt(abs(dYdt[i]) + 1e-16) # Add epsilon for safety
    dYdt[ld_index] = ld_sum

    # --- Variational Equations ---
    # Build Hessian (only N*N part is relevant)
    # Initialize only the needed part
    for i in range(N * N):
        H_mat[i] = 0.0
    # build_hessian must correctly use N and write into H_mat using N*N indexing
    # e.g., H_mat[i*N + j] = value
    build_hessian(q_local, N, k, a, b, H_mat)

    # Evolve Deviation Vectors
    for m in range(M):
        base_idx = num_base_vars + m * num_base_vars
        # Use N_MAX for allocation, N for loops/copying
        dq_m = cuda.local.array(N_MAX, float64)
        dp_m = cuda.local.array(N_MAX, float64)

        # Copy current dev vector state (size N)
        for i in range(N):
            dq_m[i] = Y[base_idx + i]
            dp_m[i] = Y[base_idx + N + i] # Corrected index offset by N

        # Calculate derivatives for this deviation vector
        # d(dq_m)/dt = dp_m
        for i in range(N):
            dYdt[base_idx + i] = dp_m[i]

        # d(dp_m)/dt = -H * dq_m (Matrix-vector product, using N x N Hessian)
        for i in range(N): # Row i
            val = 0.0
            for j in range(N): # Column j
                # Index H_mat using N, not N_MAX, for the N*N logical matrix
                val += H_mat[i * N + j] * dq_m[j]
            # Store result using correct index offset N
            dYdt[base_idx + N + i] = -val

For this system, we will generate the initial conditions using a MC approach.

In [3]:
"""
Define the function to generate initial conditions
"""
def create_initial_conditions_FPU(N, M, n_ics=1, seed=12345):
    rng = np.random.default_rng(seed)
    total_vars = 2 * N + 2 * N * M + 1
    Y0 = np.zeros((n_ics, total_vars), dtype=np.float64)
    for ic_idx in range(n_ics):
        Y0[ic_idx, :N] = rng.uniform(-1, 1, N)
        # Y0[ic_idx, :N] = 0.1
        Y0[ic_idx, N:2 * N] = rng.uniform(-1, 1, N)
        # Y0[ic_idx, N:2 * N] = 0.1
        for m_idx in range(M):
            base_idx = 2 * N + m_idx * (2 * N)
            Y0[ic_idx, base_idx + (m_idx % N)] = 1e-5
    Y0[:, 2 * N + M * (2 * N)] = 0.0
    return Y0

In [4]:
"""
Function to integrate trajectories on the GPU
"""
def integrate_trajectories_adaptive_variational(
    ics, solver_kernel, params_tuple, num_vars,
    t_final, tol, dt_initial, max_steps, renorm_interval
):
    """
    Integrates multiple trajectories (including base state and deviation vectors)
    in parallel on the GPU using a provided pre-compiled *adaptive-step variational*
    solver kernel with optional simple renormalization.

    Args:
        ics (np.ndarray): Initial conditions, shape (n_ics, num_vars).
        solver_kernel: Kernel from create_solver_kernel_adaptive_variational(...).
        params_tuple (tuple): Parameters for the ODE function.
        num_vars (int): Total number of variables.
        t_final (float): Target final integration time.
        tol (float): Tolerance for adaptive step control.
        dt_initial (float): Initial time step guess.
        max_steps (int): Max adaptive steps allowed.
        renorm_interval (int): Renormalize dev vecs every N accepted steps (<= 0 disables).

    Returns:
        np.ndarray: Final states, shape (n_ics, num_vars).
    """
    n_ics = ics.shape[0]
    if ics.shape[1] != num_vars:
        raise ValueError(f"ICS shape[1] {ics.shape[1]} != num_vars {num_vars}")

    print(f"Starting adaptive variational integration for {n_ics} trajectories...")
    start_time = time.time()

    print("  Transferring ICs to GPU..."); Y0_device = cuda.to_device(ics)
    print(f"  Allocating output on GPU ({n_ics}x{num_vars})...")
    Y_out_device = cuda.device_array_like(Y0_device)

    blockdim = 256; griddim = (n_ics + blockdim - 1) // blockdim
    print(f"  CUDA Launch Config: Grid={griddim}, Block={blockdim}")

    t0 = 0.0
    print(f"  Launching kernel (tol={tol}, dt_init={dt_initial}, max_steps={max_steps}, renorm={renorm_interval})...")

    # --- KERNEL CALL ---
    # This matches the signature of the kernel from create_solver_kernel_adaptive_variational
    solver_kernel[griddim, blockdim](
        Y0_device, t0, t_final, params_tuple, tol, dt_initial, max_steps,
        renorm_interval, # Pass the interval
        Y_out_device
    )
    cuda.synchronize()
    kernel_end_time = time.time()
    print(f"  Kernel finished in {kernel_end_time - start_time:.4f} s.")

    print("  Copying results back to host..."); final_states = Y_out_device.copy_to_host()
    copy_end_time = time.time()
    print(f"  Data copy finished in {copy_end_time - kernel_end_time:.4f} s.")

    total_time = time.time() - start_time
    print(f"Total integration function time: {total_time:.4f} s.")
    print(f"Avg integration time per IC: {total_time / n_ics:.6f} s.")

    return final_states

In [None]:
"""
Define the main function
"""
def main():

    # --- Simulation & System Parameters ---
    N = 16       # Number of particles (example)
    M = 4       # Number of deviation vectors (GALI order, M <= 2*N) (example)
    k_param = 2.0  # FPU parameter (example)
    a_param = 3.0  # FPU parameter (example)
    b_param = 4.0  # FPU parameter (example, alpha-FPU if b=0)
    num_ics = 1000 # number of initial conditions

    params_tuple = (N, M, k_param, a_param, b_param) # Bundle FPU parameters

    # Define system structure
    NUM_BASE_VARS_FPU = 2 * N
    NUM_DEV_VECTORS_FPU = M
    NUM_AUX_VARS_FPU = 1 # LD
    # Total variables including LD slot, but NOT LE sum slots yet
    NUM_TOTAL_VARS_FPU = NUM_BASE_VARS_FPU * (1 + NUM_DEV_VECTORS_FPU) + NUM_AUX_VARS_FPU

    # --- Integration Parameters ---
    t0 = 0.0
    t_final = 1000.0

    # --- Adaptive Integration Parameters ---
    abs_tolerance = 1e-8
    dt_initial_guess = 0.01
    max_steps_allowed = 2000000
    # Renormalization: GALI calculation doesn't strictly require in-kernel renormalization,
    # but might be useful for stability if vectors grow very large between GALI calculations.
    # Set to 0 if GALI calculated frequently or vectors don't diverge too fast.
    renorm_interval = 10 # Example: Disable simple renormalization

    output = False # Set to True to save results
    if output is True:
        # --- Output Configuration ---
        output_dir = f"fpu_results_N{N}_M{M}_adaptive_gali" # Changed dir name
        gali_dir = os.path.join(output_dir, "GALI")         # Changed subdir name
        aux_info_dir = os.path.join(output_dir, "Aux_Info")
        ics_dir = os.path.join(output_dir, "ICS")
        os.makedirs(gali_dir, exist_ok=True); os.makedirs(aux_info_dir, exist_ok=True); os.makedirs(ics_dir, exist_ok=True)
        output_prefix = f"fpu_adaptive_gali_N{N}_M{M}_n{num_ics}" # Changed prefix

        print(f"--- FPU Simulation (Adaptive Step, GALI={M}) ---") # Changed title
        print(f"Parameters: N={N}, M={M}, k={k_param}, a={a_param}, b={b_param}")
        print(f"Integration: t_final={t_final}, tol={abs_tolerance}, dt_init={dt_initial_guess}, max_steps={max_steps_allowed}")
        print(f"ICs: num_ics={num_ics}")
        print(f"Output Dir: {output_dir}")

    # --- Generate Initial Conditions ---
    # Use the FPU IC generator
    ics = create_initial_conditions_FPU(N, M, n_ics=num_ics, seed=42)
    if ics.shape[0] == 0:
        print("Error: No initial conditions generated. Exiting.")
        sys.exit(1)
    num_ics = ics.shape[0]

    # --- Create the Adaptive Variational Solver Kernel ---
    print("\nCreating adaptive variational solver kernel...")
    adaptive_variational_solver = create_solver_kernel_adaptive_variational(
        ode_fpu_variational_adaptive, # Pass the FPU ODE function
        num_vars=NUM_TOTAL_VARS_FPU,
        num_base_vars=NUM_BASE_VARS_FPU,
        num_dev_vectors=NUM_DEV_VECTORS_FPU
    )
    print("Kernel created.")

    # --- Integrate Trajectories ---
    print("\nIntegrating trajectories (adaptive)...")
    final_results = integrate_trajectories_adaptive_variational( # Use the correct wrapper
        ics=ics,
        solver_kernel=adaptive_variational_solver,
        params_tuple=params_tuple, # Pass FPU parameters
        num_vars=NUM_TOTAL_VARS_FPU,
        t_final=t_final,
        tol=abs_tolerance,
        dt_initial=dt_initial_guess,
        max_steps=max_steps_allowed,
        renorm_interval=renorm_interval # Pass renorm interval
    )
    print("Integration finished.")

    # --- Energy Conservation Check ---
    print("\nChecking energy conservation...")
    initial_energies = np.array([Hamiltonian(ic[:N], ic[N:NUM_BASE_VARS_FPU], N, k_param, a_param, b_param) for ic in ics])
    final_energies = np.array([Hamiltonian(state[:N], state[N:NUM_BASE_VARS_FPU], N, k_param, a_param, b_param) for state in final_results])
    energy_diff = np.abs(final_energies - initial_energies)
    max_energy_diff = np.max(energy_diff) if energy_diff.size > 0 else np.nan
    print(f"  Max absolute energy deviation from initial: {max_energy_diff:.2e}")


    # --- Compute GALI ---
    print("\nCalculating GALI...")
    gali_values = np.zeros(num_ics)
    calculation_errors = 0
    dev_vec_start_idx = NUM_BASE_VARS_FPU # Should be 2*N
    dev_vec_size = NUM_BASE_VARS_FPU    # Should be 2*N
    gali_order = NUM_DEV_VECTORS_FPU  # Should be M
    phase_space_dim = NUM_BASE_VARS_FPU # Should be 2*N

    for i in range(num_ics):
        final_state_i = final_results[i]
        dev_matrix_rows = np.zeros((gali_order, phase_space_dim), dtype=np.float64)
        for m in range(gali_order):
            start_idx = dev_vec_start_idx + m * dev_vec_size
            end_idx = start_idx + dev_vec_size
            if end_idx <= final_state_i.shape[0]:
                dev_matrix_rows[m, :] = final_state_i[start_idx:end_idx]
            else:
                print(f"Error extracting dev vec {m} for IC {i}. Index out of bounds.")
                dev_matrix_rows[m, :] = np.nan

        if not np.isnan(dev_matrix_rows).any():
            try:
                # --- FIX: Force C-contiguous copy ---
                dev_matrix_rows_contiguous = np.require(dev_matrix_rows, requirements=['C'])
                # dev_matrix_rows_contiguous = cp.asarray(dev_matrix_rows_contiguous)
                # ------------------------------------

                # Pass the contiguous copy to calc_GALI
                gali_values[i] = calc_GALI(dev_matrix_rows_contiguous)

            except Exception as e:
                # If error still occurs, this will catch it
                print(f"ERROR calculating GALI for IC {i} (in main loop): {type(e).__name__} - {e}")
                # Make sure the enhanced error reporting inside calc_GALI prints the traceback
                gali_values[i] = np.nan
                calculation_errors += 1
        else:
            gali_values[i] = np.nan
            calculation_errors += 1


    if calculation_errors > 0:
        print(f"Finished GALI calculation with {calculation_errors} errors.")
    else:
        print("GALI calculation finished.")
    print(f"GALI values (first 10): {gali_values[:10]}")

    # Extract final LD values if needed
    ld_index = NUM_BASE_VARS_FPU * (1 + NUM_DEV_VECTORS_FPU)
    if ld_index < final_results.shape[1]:
        ld_values = final_results[:, ld_index]
    else:
        ld_values = np.full(num_ics, np.nan) # LD index wasn't valid

    if output is True:
        # --- Save Results ---
        print("\nSaving results...")
        output_file_gali = os.path.join(gali_dir, f"{output_prefix}_gali.dat") # Changed filename
        output_file_ld = os.path.join(gali_dir, f"{output_prefix}_ld.dat") # Keep LD separate? Changed dir
        output_file_aux = os.path.join(aux_info_dir, f"{output_prefix}_aux.dat")
        output_file_ics = os.path.join(ics_dir, f"{output_prefix}_ics.dat")

        # Save GALI
        np.savetxt(output_file_gali, gali_values, delimiter=',', header=f'GALI_{M}', comments='')
        print(f"  Saved GALI values to: {output_file_gali}")
        # Save LD
        np.savetxt(output_file_ld, ld_values, delimiter=',', header='LD', comments='')
        print(f"  Saved LD values to: {output_file_ld}")

        # Save Auxiliary Info
        results_aux = np.array([[max_energy_diff, np.nan, np.nan, num_ics, N, M, k_param, a_param, b_param, abs_tolerance, dt_initial_guess, max_steps_allowed]])
        np.savetxt(output_file_aux, results_aux, delimiter=',',
                   header='max_energy_diff,time_per_ic(NaN),total_time(NaN),num_ics,N,M,k,a,b,tolerance,dt_initial,max_steps', # Changed header
                   comments='')
        print(f"  Saved auxiliary info to: {output_file_aux}")

        # Save Main Initial Conditions (full state)
        np.savetxt(output_file_ics, ics, delimiter=',')
        print(f"  Saved initial conditions (full state) to: {output_file_ics}")

    print("\n--- Simulation Complete ---")

"""
Call main function
"""
if __name__ == "__main__":
    # Check for CUDA availability
    if not cuda.is_available():
        print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        print("!!! CUDA is not available or not detected! !!!")
        print("!!! This script requires a CUDA-enabled GPU!!!")
        print("!!! and correctly installed CUDA drivers   !!!")
        print("!!! and Numba.                           !!!")
        print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        sys.exit(1) # Exit if no CUDA
    else:
        print(f"Found CUDA device: {cuda.get_current_device().name.decode()}")
        main() # Run the main function