This example shows how to calculate the spectrum of Lyapunov exponents for the Fermi-Pasta-Ulam system using an adaptive step size solver.

In [1]:
# Import libraries
import numpy as np
from numba import cuda, float64
import math
import time
import sys
import os
from numpy.random import default_rng
try:
    from chaoticus import (create_solver_kernel_LE_QR)
except ImportError:
    print("Error: Could not import functions from 'chaoticus.py'.")
    print("Make sure the file exists and is in the correct path.")
    sys.exit(1)

The Fermi-Pasta-Ulam system Hamiltonian is given by:

$$
    \mathcal{H}(\boldsymbol{q}, \boldsymbol{p}, k, \alpha, \beta) = \sum_{i = 1}^{N} \dfrac{p^2_{i}}{2} + \dfrac{k}{2} \sum_{i = 1}^{N} \left( q_{i+1} - q_{i} \right)^2 + \dfrac{\alpha}{3} \sum_{i = 1}^{N} \left( q_{i+1} - q_{i} \right)^3 + \dfrac{\beta}{4} \sum_{i = 1}^{N} \left( q_{i+1} - q_{i} \right)^4
$$
where $N$ is the number of particles (DoF of the system), $q_i$ and $p_i$ are the position and momentum of the $i$-th particle, $k$ models the linear interaction, $\alpha$ and $\beta$ are the nonlinear coefficients.

Hamilton's equations of motion are:
$$
    \begin{cases}
        \dot{q}_i &= \frac{\partial \mathcal{H}}{\partial p_i} = p_i \\[.2cm]
        \dot{p}_i &= - \frac{\partial \mathcal{H}}{\partial q_i} = - k \left[ \Delta q_i - \Delta q_{i+1} \right] - \alpha \left[ (\Delta q_i)^2 - (\Delta q_{i+1})^2 \right] - \beta \left[ (\Delta q_i)^3 - (\Delta q_{i+1})^3 \right]
    \end{cases}
$$
with $\Delta q_i = q_i - q_{i-1}$ and $\Delta q_{i+1} = q_{i+1} - q_{i}$.

For this system, the variational equations are:

$$
    \begin{cases}
        \dot{\delta q}_i &= \delta p_i \\
        \dot{\delta p}_i &= - \sum_{j = 1}^{N} \dfrac{\partial^{2} \mathcal{H}}{\partial q_i \partial q_j} \delta q_j
    \end{cases}
$$
and three possible cases for the second partial derivative of the Hamiltonian are:

$$
    \begin{cases}
        \dfrac{\partial^2 \mathcal{H}}{\partial q^2_{i}} &= 2k + 2\alpha \left( \Delta q_i + \Delta q_{i+1} \right) + 3\beta \left[ \left( \Delta q_i \right)^2 + \left( \Delta q_{i+1} \right)^2 \right] \\[.5cm]
        \dfrac{\partial^2 \mathcal{H}}{\partial q_{i} \partial q_{i-1}} &= - \left[ k + 2\alpha \Delta q_i + 3\beta \left( \Delta q_i \right)^2 \right] \\[.5cm]
        \dfrac{\partial^2 \mathcal{H}}{\partial q_{i} \partial q_{i+1}} &= - \left[ k + 2\alpha \Delta q_{i+1} + 3\beta \left( \Delta q_{i+1} \right)^2 \right]
    \end{cases}
$$

In order to compute the spectrum of Lyapunov exponents, the matrix whose columns are the deviation vectors is computed in every integration step. Then, the QR decompostion is calcualted to then compute each Lyapunov exponent.

$$
    \lambda_i = \frac{1}{t} \sum_{k = 0}^{N_{steps}}\ln \left( |R^{(k)}_{ii}(t)| \right)
$$
where $R^{(k)}_{ii}$ is the diagonal element of the upper triangular matrix $R$ obtained from the QR decomposition at integration step $k$ and $t$ is the total time of integration.



In [2]:
"""
Define the Hamiltonian function
"""
def Hamiltonian(q, p, N, k, a, b):
    T = 0.0
    for i in range(N):
        T += 0.5 * p[i]**2
    V = 0.0
    qi_prev = 0.0
    for i in range(N):
        diff = q[i] - qi_prev
        V += 0.5 * k * diff**2 + (a/3.0) * diff**3 + (b/4.0) * diff**4
        qi_prev = q[i]
    diff = 0.0 - qi_prev
    V += 0.5 * k * diff**2 + (a/3.0) * diff**3 + (b/4.0) * diff**4
    return T + V

"""
Define auxiliary functions for intermediate calculations
"""
@cuda.jit(device=True, inline=True)
def partial_diag(qi, qi_m1, qi_p1, k, a, b):
    # Compute the differences once.
    d_left = qi - qi_m1
    d_right = qi_p1 - qi
    return 2.0 * k + 2.0 * a * (d_left + d_right) + 3.0 * b * (d_left * d_left + d_right * d_right)

@cuda.jit(device=True, inline=True)
def partial_non_diag(qi, qj, k, a, b):
    d = qi - qj
    return - (k + 2.0 * a * d + 3.0 * b * d * d)

@cuda.jit(device=True, inline=True)
def p_i_dot(qi, qi_m1, qi_p1, k, a, b):
    d_left = qi - qi_m1
    d_right = qi_p1 - qi
    return - k * (d_left - d_right) - a * (d_left * d_left - d_right * d_right) - b * (d_left * d_left * d_left - d_right * d_right * d_right)

@cuda.jit(device=True, inline=True)
def build_hessian(q, N, k, a, b, H_mat):
    for i in range(N):
        qi = q[i]
        qi_m1 = q[i - 1] if i > 0 else 0.0
        qi_p1 = q[i + 1] if i < N - 1 else 0.0
        diag_val = partial_diag(qi, qi_m1, qi_p1, k, a, b)
        H_mat[i * N + i] = diag_val
        if i > 0:
            off_val = partial_non_diag(qi, qi_m1, k, a, b)
            H_mat[i * N + (i - 1)] = off_val
            H_mat[(i - 1) * N + i] = off_val
        if i < N - 1:
            off_val = partial_non_diag(qi, qi_p1, k, a, b)
            H_mat[i * N + (i + 1)] = off_val
            H_mat[(i + 1) * N + i] = off_val

"""
Define Hamilton's equations of motion and variational equations
"""
N_MAX = 5 # Number of particles (It has to be defined as a global constant)
DIM_MATRIX = N_MAX * N_MAX
@cuda.jit(device=True, inline=True)
def ode_fpu_variational_adaptive(t, Y, dYdt, params):
    """
    Calculates derivatives for the FPU system including deviation vectors and LD.
    Adapted for the generic adaptive variational solver interface.

    State vector Y structure (size = 2*N*(1+M)+1):
    [q(N), p(N), dq0(N), dp0(N), ..., dqM-1(N), dpM-1(N), LD(1)]

    Args:
        t (float): Current time (unused).
        Y (array): Current state vector.
        dYdt (array): Output array for derivatives.
        params (tuple): System parameters (N, M, k, a, b).
    """
    # --- Unpack Parameters ---
    N, M, k, a, b = params
    num_base_vars = 2 * N # Convenience

    # --- Local Arrays ---
    # Allocate with max size, but use N for logic
    q_local = cuda.local.array(N_MAX, float64)
    p_local = cuda.local.array(N_MAX, float64)
    # Allocate max size for Hessian, operate on N*N part
    H_mat = cuda.local.array(DIM_MATRIX, dtype=float64)

    # --- Get Base Variables (up to N) ---
    for i in range(N):
        q_local[i] = Y[i]
        p_local[i] = Y[N + i]

    # --- Base Derivatives ---
    # dq/dt = p
    for i in range(N):
        dYdt[i] = p_local[i]
    # dp/dt = F(q)
    for i in range(N):
        q_i = q_local[i]
        q_im1 = q_local[i - 1] if i > 0 else 0.0
        q_ip1 = q_local[i + 1] if i < N - 1 else 0.0
        dYdt[N + i] = p_i_dot(q_i, q_im1, q_ip1, k, a, b)

    # --- LD Derivative (Base Variables) ---
    ld_index = num_base_vars * (1 + M) # Index of the single aux var (LD)
    ld_sum = 0.0
    # Sum sqrt(abs(derivative)) over base variables
    for i in range(num_base_vars): # Sum over 2N base derivatives
         ld_sum += math.sqrt(abs(dYdt[i]) + 1e-16) # Add epsilon for safety
    dYdt[ld_index] = ld_sum

    # --- Variational Equations ---
    # Build Hessian (only N*N part is relevant)
    # Initialize only the needed part
    for i in range(N * N):
        H_mat[i] = 0.0
    # build_hessian must correctly use N and write into H_mat using N*N indexing
    # e.g., H_mat[i*N + j] = value
    build_hessian(q_local, N, k, a, b, H_mat)

    # Evolve Deviation Vectors
    for m in range(M):
        base_idx = num_base_vars + m * num_base_vars
        # Use N_MAX for allocation, N for loops/copying
        dq_m = cuda.local.array(N_MAX, float64)
        dp_m = cuda.local.array(N_MAX, float64)

        # Copy current dev vector state (size N)
        for i in range(N):
            dq_m[i] = Y[base_idx + i]
            dp_m[i] = Y[base_idx + N + i] # Corrected index offset by N

        # Calculate derivatives for this deviation vector
        # d(dq_m)/dt = dp_m
        for i in range(N):
            dYdt[base_idx + i] = dp_m[i]

        # d(dp_m)/dt = -H * dq_m (Matrix-vector product, using N x N Hessian)
        for i in range(N): # Row i
            val = 0.0
            for j in range(N): # Column j
                # Index H_mat using N, not N_MAX, for the N*N logical matrix
                val += H_mat[i * N + j] * dq_m[j]
            # Store result using correct index offset N
            dYdt[base_idx + N + i] = -val

For this system, we will generate the initial conditions using a MC approach.

In [3]:
"""
Define the function to generate initial conditions
"""
def create_initial_conditions(N, M, n_ics=1, seed=12345):
    """
    Generates initial conditions for FPU with M deviation vectors,
    1 auxiliary variable (LD), and M slots for LE sum accumulation.

    Initializes base vars randomly, dev vecs along standard basis directions,
    and LD/LE sum slots to zero.

    Args:
        N (int): Number of particles.
        M (int): Number of deviation vectors / LEs to calculate.
        n_ics (int): Number of initial conditions to generate. Defaults to 1.
        seed (int): Seed for the random number generator. Defaults to 12345.

    Returns:
        np.ndarray: Array of initial conditions, shape (n_ics, total_vars).
                    total_vars = 2*N*(1+M) + 1 + M
    """
    rng = default_rng(seed)
    num_base_vars = 2 * N
    num_dev_vectors = M
    num_aux_vars = 1 # Assuming 1 for LD

    # Calculate total size including LE sum slots
    total_vars = num_base_vars * (1 + num_dev_vectors) + num_aux_vars + num_dev_vectors

    # Indices for clarity
    q_end_idx = N
    p_end_idx = num_base_vars
    dev_vec_start_idx = num_base_vars
    ld_index = num_base_vars * (1 + M) # Index of the single aux var (LD)
    le_sum_start_idx = ld_index + 1    # LE sums start after the LD slot

    # Initialize the full array with zeros
    Y0 = np.zeros((n_ics, total_vars), dtype=np.float64)

    print(f"Generating {n_ics} ICs for FPU N={N}, M={M} (State size: {total_vars})...")
    for ic_idx in range(n_ics):
        # --- Initialize Base Variables (q, p) ---
        # Example: Random uniform distribution (adjust range as needed)
        # Y0[ic_idx, :q_end_idx] = rng.uniform(-0.5, 0.5, N)  # q
        # Y0[ic_idx, q_end_idx:p_end_idx] = rng.uniform(-0.5, 0.5, N)  # p
        Y0[ic_idx, :N] = 0.1
        Y0[ic_idx, N:2 * N] = 0.1

        # --- Initialize M Deviation Vectors ---
        # Example: Scaled standard basis vectors, cycling through components
        for m_idx in range(M):
            # Calculate the start index for the m-th deviation vector's block
            base_idx = dev_vec_start_idx + m_idx * num_base_vars
            # Determine which single component to perturb
            component_to_set = m_idx % num_base_vars # Cycles through 0, 1, ..., 2N-1
            # Set that component to a small value
            if base_idx + component_to_set < ld_index: # Bounds check
                Y0[ic_idx, base_idx + component_to_set] = 1e-5
            # All other components of this dev vec remain 0.0

        # --- Initialize Auxiliary Variable (LD) ---
        if ld_index < total_vars: # Check index validity
             Y0[ic_idx, ld_index] = 0.0

        # --- Initialize LE Sum Slots ---
        # Indices from le_sum_start_idx to the end of the array
        # These are already 0.0 due to np.zeros, no explicit action needed here
        # Y0[ic_idx, le_sum_start_idx:] = 0.0

    print("IC generation complete.")
    return Y0

In [4]:
"""
Function to integrate trajectories on the GPU
"""
def integrate_trajectories_adaptive_LE_QR(
    ics, solver_kernel, params_tuple, num_vars,
    t_final, tol, dt_initial, max_steps, qr_interval
):
    """
    Integrates multiple trajectories (base + dev vecs + aux + LE sums)
    in parallel on the GPU using a provided pre-compiled adaptive-step kernel
    that performs periodic QR decomposition for Lyapunov Exponent sum calculation.

    Args:
        ics (np.ndarray): Initial conditions, shape (n_ics, num_vars).
                          *MUST* include slots for LE sums, initialized (e.g., to 0).
        solver_kernel: Kernel function from create_solver_kernel_adaptive_LE_QR(...).
        params_tuple (tuple): Parameters for the ODE function.
        num_vars (int): Total number of variables (incl. LE sums).
        t_final (float): Target final integration time.
        tol (float): Tolerance for the adaptive integrator.
        dt_initial (float): Initial time step guess.
        max_steps (int): Max adaptive steps allowed.
        qr_interval (int): Perform QR decomp/LE accumulation every N *accepted* steps.
                           Set <= 0 to disable.

    Returns:
        np.ndarray: Final states array, shape (n_ics, num_vars). Contains state
                    at the last accepted step time, including the *accumulated*
                    log|R_jj| sums for LE calculation in the final slots.
                    Final division by time must be done on the host.
    """
    n_ics = ics.shape[0]
    if ics.shape[1] != num_vars:
        raise ValueError(f"ICS shape[1] {ics.shape[1]} != num_vars {num_vars}")

    print(f"Starting adaptive integration with LE(QR) for {n_ics} trajectories up to t={t_final}...")
    start_time = time.time()

    # --- Prepare GPU Data ---
    print("  Transferring initial conditions to GPU..."); Y0_device = cuda.to_device(ics)
    print(f"  Allocating output on GPU ({n_ics}x{num_vars})...")
    Y_out_device = cuda.device_array_like(Y0_device)

    # --- Configure CUDA Launch ---
    blockdim = 128 # Example block dimension
    griddim = (n_ics + blockdim - 1) // blockdim
    print(f"  CUDA Launch Config: Grid={griddim}, Block={blockdim}")

    # --- Execute Adaptive LE QR Kernel ---
    t0 = 0.0 # Initial time
    print(f"  Launching LE QR kernel (tol={tol}, dt_init={dt_initial}, max_steps={max_steps}, qr_interval={qr_interval})...") # Updated print

    # Call the PASSED-IN solver_kernel with the correct signature INCLUDING qr_interval
    solver_kernel[griddim, blockdim](
        Y0_device,      # Initial states (incl. LE sum slots initialized to 0)
        t0,             # Initial time
        t_final,        # Target final time
        params_tuple,   # Parameters tuple for the ODE function
        tol,            # Error tolerance
        dt_initial,     # Initial dt guess
        max_steps,      # Max allowed steps
        qr_interval,    # QR interval <<-- PASSED HERE
        Y_out_device    # Output array
    )
    cuda.synchronize() # Wait for the kernel to complete
    kernel_end_time = time.time()
    print(f"  Kernel execution finished in {kernel_end_time - start_time:.4f} s.")

    # --- Retrieve Results ---
    print("  Copying results back to host..."); final_states_with_sums = Y_out_device.copy_to_host()
    copy_end_time = time.time()
    print(f"  Data copy finished in {copy_end_time - kernel_end_time:.4f} s.")

    total_time = time.time() - start_time
    print(f"Total integration function time: {total_time:.4f} s.")
    print(f"Avg integration time per IC: {total_time / n_ics:.4f} s.")

    # The returned array contains the final state AND the accumulated LE sums
    return final_states_with_sums

In [None]:
"""
Define the main function
"""
def main():
    # --- Parameters ---
    N = 5; M = 4 # Example FPU size and number of LEs
    k_param = 2.0; a_param = 3.0; b_param = 4.0
    num_ics = 1000

    params_fpu = (N, M, k_param, a_param, b_param)

    # --- Dimensions (LE QR version) ---
    NUM_BASE_VARS_FPU = 2 * N
    NUM_DEV_VECTORS_FPU = M
    NUM_AUX_VARS_FPU = 1 # LD
    NUM_LE_SUMS = NUM_DEV_VECTORS_FPU
    NUM_TOTAL_VARS_LE = NUM_BASE_VARS_FPU * (1 + NUM_DEV_VECTORS_FPU) + NUM_AUX_VARS_FPU + NUM_LE_SUMS # Correct total size

    # --- Integration Parameters ---
    t0 = 0.0
    t_final = 10000.0 # Shorter time for testing

    # --- Adaptive Integration & QR Parameters ---
    abs_tolerance = 1e-5
    dt_initial_guess = 0.01
    max_steps_allowed = 2000000
    qr_interval = 10 # Perform QR every 10 accepted steps

    output = False # Save results?
    if output:
        # --- Output Config ---
        output_dir = f"fpu_results_N{N}_M{M}_adaptive_LE_QR" # Changed
        le_dir = os.path.join(output_dir, "LE_Spectrum")     # Changed
        aux_info_dir = os.path.join(output_dir, "Aux_Info")
        ics_dir = os.path.join(output_dir, "ICS")
        os.makedirs(le_dir, exist_ok=True); os.makedirs(aux_info_dir, exist_ok=True); os.makedirs(ics_dir, exist_ok=True)
        output_prefix = f"fpu_adaptive_LE_QR_N{N}_M{M}_n{num_ics}" # Changed

        print(f"--- FPU Simulation (Adaptive Step, LE Spectrum via QR, M={M}) ---") # Changed
        print(f"Parameters: N={N}, M={M}, k={k_param}, a={a_param}, b={b_param}")
        print(f"Integration: t_final={t_final}, tol={abs_tolerance}, dt_init={dt_initial_guess}, max_steps={max_steps_allowed}")
        print(f"QR Interval: {qr_interval} steps")
        print(f"ICs: num_ics={num_ics}")
        print(f"Output Dir: {output_dir}")

    # --- Generate Initial Conditions ---
    # Use IC generator that creates space for LE sums (initialized to 0)
    ics = create_initial_conditions(N, M, n_ics=num_ics, seed=42)
    if ics.shape[0] == 0: sys.exit("Error: No ICs generated.")
    num_ics = ics.shape[0]

    # --- Create the Adaptive LE QR Solver Kernel ---
    print("\nCreating adaptive LE QR solver kernel...")
    le_qr_solver = create_solver_kernel_LE_QR( # Changed factory
        ode_fpu_variational_adaptive, # ODE func should ignore LE sum slots
        num_vars=NUM_TOTAL_VARS_LE,
        num_base_vars=NUM_BASE_VARS_FPU,
        num_dev_vectors=NUM_DEV_VECTORS_FPU
    )
    print("Kernel created.")

    # --- Integrate Trajectories ---
    print("\nIntegrating trajectories (adaptive + LE QR)...")
    final_state_and_sums = integrate_trajectories_adaptive_LE_QR( # Changed wrapper
        ics=ics,
        solver_kernel=le_qr_solver, # Pass the LE QR kernel
        params_tuple=params_fpu,
        num_vars=NUM_TOTAL_VARS_LE,
        t_final=t_final,
        tol=abs_tolerance,
        dt_initial=dt_initial_guess,
        max_steps=max_steps_allowed,
        qr_interval=qr_interval # Pass QR interval
    )
    print("Integration finished.")

    # --- Energy Conservation Check ---
    print("\nChecking energy conservation...")
    initial_energies = np.array([Hamiltonian(ic[:N], ic[N:NUM_BASE_VARS_FPU], N, k_param, a_param, b_param) for ic in ics])
    final_energies = np.array([Hamiltonian(state[:N], state[N:NUM_BASE_VARS_FPU], N, k_param, a_param, b_param) for state in final_state_and_sums])
    energy_diff = np.abs(final_energies - initial_energies)
    max_energy_diff = np.max(energy_diff) if energy_diff.size > 0 else np.nan
    print(f"  Max absolute energy deviation from initial: {max_energy_diff:.2e}")
    print(f"  Mean final energy: {np.mean(final_energies):.4f} +/- {np.std(final_energies):.2e}")

    # --- Remove GALI Calculation ---

    # --- Calculate Lyapunov Exponents ---
    print("\nCalculating Lyapunov Exponents...")
    # Determine actual final time (assuming kernel ran to t_final for all threads)
    # TODO: A more robust method would get actual time reached per thread if max_steps is a concern
    actual_integration_time = t_final

    # Extract accumulated sums
    le_sum_start_index = NUM_BASE_VARS_FPU * (1 + NUM_DEV_VECTORS_FPU) + NUM_AUX_VARS_FPU
    accumulated_le_sums = final_state_and_sums[:, le_sum_start_index:]

    if actual_integration_time > 0:
        lyapunov_exponents = accumulated_le_sums / actual_integration_time
        print(f"LE calculation complete (divided sums by t_final={actual_integration_time}).")
        # Print spectrum for first few ICs
        print("\nLyapunov Spectrum (First 10 ICs):")
        for i in range(min(10, num_ics)):
             # Sort LEs in descending order for standard presentation
             sorted_les = np.sort(lyapunov_exponents[i])[::-1]
             print(f" IC {i}: {sorted_les}")
    else:
        print("Warning: Integration time is zero. Cannot calculate Lyapunov exponents.")
        lyapunov_exponents = np.full_like(accumulated_le_sums, np.nan)


    # Extract final LD values if needed
    ld_index = NUM_BASE_VARS_FPU * (1 + NUM_DEV_VECTORS_FPU)
    if ld_index < final_state_and_sums.shape[1]:
        ld_values = final_state_and_sums[:, ld_index]
    else: ld_values = np.full(num_ics, np.nan)

    if output:
        # --- Save Results ---
        print("\nSaving results...")
        output_file_le = os.path.join(le_dir, f"{output_prefix}_LE_spectrum.dat") # Changed
        output_file_ld = os.path.join(le_dir, f"{output_prefix}_ld.dat") # Changed dir
        output_file_aux = os.path.join(aux_info_dir, f"{output_prefix}_aux.dat")
        output_file_ics = os.path.join(ics_dir, f"{output_prefix}_ics.dat")

        # Save LE Spectrum
        header_le = ",".join([f"LE_{j}" for j in range(NUM_DEV_VECTORS_FPU)])
        np.savetxt(output_file_le, lyapunov_exponents, delimiter=',', header=header_le, comments='')
        print(f"  Saved LE spectrum to: {output_file_le}")
        # Save LD
        np.savetxt(output_file_ld, ld_values, delimiter=',', header='LD', comments='')
        print(f"  Saved LD values to: {output_file_ld}")

        # Save Auxiliary Info
        results_aux = np.array([[max_energy_diff, np.nan, np.nan, num_ics, N, M, k_param, a_param, b_param, abs_tolerance, dt_initial_guess, max_steps_allowed, qr_interval]]) # Use qr_interval
        np.savetxt(output_file_aux, results_aux, delimiter=',',
                   header='max_energy_diff,time_per_ic(NaN),total_time(NaN),num_ics,N,M,k,a,b,tolerance,dt_initial,max_steps,qr_interval', # Changed header
                   comments='')
        print(f"  Saved auxiliary info to: {output_file_aux}")

        # Save Main Initial Conditions (full state)
        np.savetxt(output_file_ics, ics, delimiter=',')
        print(f"  Saved initial conditions (full state) to: {output_file_ics}")

    print("\n--- Simulation Complete ---")



"""
Call main function
"""
if __name__ == "__main__":
    # Check for CUDA availability
    if not cuda.is_available():
        print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        print("!!! CUDA is not available or not detected! !!!")
        print("!!! This script requires a CUDA-enabled GPU!!!")
        print("!!! and correctly installed CUDA drivers   !!!")
        print("!!! and Numba.                           !!!")
        print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        sys.exit(1) # Exit if no CUDA
    else:
        print(f"Found CUDA device: {cuda.get_current_device().name.decode()}")
        main() # Run the main function