This notebook prepares the data for the subsequent notebook `1-Step-Analyze.ipynb`, which generates figures illustrating the one-step prediction accuracy of the wave equation with Dirichlet boundary conditions, as described in Supplementary Material Section 3.

In [None]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import torch
num_devices = torch.cuda.device_count()
print("Number of visible GPUs:", num_devices)

for i in range(num_devices):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

current_device = torch.cuda.current_device()
print("Current device index:", current_device)
print("Current device name:", torch.cuda.get_device_name(current_device))

In [None]:
import random
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

from tqdm import tqdm
from llama_utils import load_model_and_tokenizer, generate_text_multiple
from data_processing import (
    SimpleSerializerSettings,
    scale_2d_array,
    unscale_2d_array,
    serialize_2d_integers,
    deserialize_2d_integers,
)
from wave_equation import (
    compute_exact_solution_random_ic_vary_Nx,
    solve_wave_leapfrog,
    solve_wave_crank_nicolson,
    visualize_spline_ic,
    plot_both_grids
)

MODEL_NAME = "meta-llama/Llama-3.1-8B"
# MODEL_NAME = "meta-llama/Llama-3.2-3B"
# MODEL_NAME = "meta-llama/Llama-3.2-1B"
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

In [None]:
model, tokenizer = load_model_and_tokenizer(MODEL_NAME)

In [None]:
# Example: Demonstrating the process of generating and visualizing a random initial condition
L = 2
Nx = 14
init_cond_random = np.random.uniform(-0.5, 0.5, size=Nx)
fig, cs = visualize_spline_ic(L, Nx, init_cond_random)
plt.tight_layout()
plt.show()

# Example: Demonstrating how to resample spatial points from an underlying random initial condition
Nx_original = Nx
Nx_new = 14
fig, cs, init_cond_random_new = plot_both_grids(L, Nx_original, Nx_new, init_cond_random)
plt.tight_layout()
plt.show()

In [None]:
# Define parameters for the Wave equation
L = 2       # Length of the spatial domain
c = 0.2     # Wave speed
T = 0.5     # Total simulation time
Nx = 14     # Number of spatial steps (excluding boundary points)
Nt = 25     # Number of time steps 
dx = L/(Nx+1)
dt = T/Nt

# Serialize the entire exact solution for all time steps
u_exact = compute_exact_solution_random_ic_vary_Nx(L, c, T, Nx, Nt, spline_obj=cs)
settings = SimpleSerializerSettings(space_sep=",", time_sep=";")
u_exact_scaled, vmin_exact, vmax_exact = scale_2d_array(u_exact)
u_exact_serialized = serialize_2d_integers(u_exact_scaled, settings)
# Reconstruct the scaled data from the text
u_exact_parsed = deserialize_2d_integers(u_exact_serialized, settings)
u_exact_unscaled = unscale_2d_array(u_exact_parsed, vmin_exact, vmax_exact)

In [None]:
# Exact coarse and exact fine demo plots
u_coarse, u_fine, x_fine, t_fine = compute_exact_solution_random_ic_vary_Nx(
    L, c, T, Nx, Nt, spline_obj=cs, return_fine=True)
Nt_plus1, Nx = u_coarse.shape
x_coarse_full = np.linspace(-L/2, L/2, Nx+2)
t_coarse = np.linspace(0, t_fine[-1], Nt_plus1)
u_coarse_full = np.zeros((Nt_plus1, Nx+2))
u_coarse_full[:, 1:-1] = u_coarse

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6), tight_layout=True)
# Exact coarse
cf1 = ax1.contourf(t_coarse, x_coarse_full, u_coarse_full.T, levels=50, cmap='jet')
ax1.set_title('Reference Solution (Coarse Grid)', fontsize=18)
ax1.set_xlabel('Time $(t)$', fontsize=18)
ax1.set_ylabel('Space $(x)$', fontsize=18)
plt.colorbar(cf1, ax=ax1)

# Exact fine
cf2 = ax2.contourf(t_fine, x_fine, u_fine.T, levels=50, cmap='jet')
ax2.set_title('Reference Solution (Fine Grid)', fontsize=18)
ax2.set_xlabel('Time $(t)$', fontsize=18)
ax2.set_ylabel('Space $(x)$', fontsize=18)
plt.colorbar(cf2, ax=ax2)
plt.show()

In [None]:
# One-step leapfrog update for wave equation
def one_step_leapfrog(u_nm1, u_n, c, dt, dx):
    r2 = (c * dt / dx) ** 2
    u_pad = np.concatenate(([0.0], u_n, [0.0]))
    lap = u_pad[2:] - 2*u_pad[1:-1] + u_pad[:-2]
    return 2*u_n - u_nm1 + r2*lap

# Estimates velocity using second-order backward difference
def estimate_velocity_bdf2(u_nm2, u_nm1, u_n, dt):
    return (3*u_n - 4*u_nm1 + u_nm2) / (2*dt)

# Range of Nt values to test
all_Nt_values = range(2, 41, 2)
llm_final_max_diff = []
llm_final_rmse = []
llm_final_max_diff_std = []
llm_final_rmse_std = []
leapfrog_final_max_diff = []
leapfrog_final_rmse = []
crank_nicolson_final_max_diff = []
crank_nicolson_final_rmse = []
# Fixed parameter - number of spatial points (excluding boundary points)
Nx = 14
n_seeds = 50
# Generate all random initial conditions
stored_initial_conditions = []
stored_spline_objects = []
for seed in range(n_seeds):
    # Set seed for this initial condition
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    # Generate and store the random initial condition
    init_cond_random = np.random.uniform(-0.5, 0.5, size=Nx)
    stored_initial_conditions.append(init_cond_random.copy())
    # Create and store the spline object
    fig, cs = visualize_spline_ic(L, Nx, init_cond_random)
    plt.close(fig)
    stored_spline_objects.append(cs)
stored_initial_conditions_array = np.array(stored_initial_conditions)

for Nt in tqdm(all_Nt_values):
    dt = T / Nt
    dx = L / (Nx + 1)
    seed_max_diffs_llm = []
    seed_rmses_llm = []
    seed_max_diffs_leapfrog = []
    seed_rmses_leapfrog = []
    seed_max_diffs_crank_nicolson = []
    seed_rmses_crank_nicolson = []
    for seed in range(n_seeds):
        # Use the stored initial condition and spline
        init_cond_random = stored_initial_conditions[seed]
        cs = stored_spline_objects[seed]
        # Compute exact solution for this initial condition
        u_exact = compute_exact_solution_random_ic_vary_Nx(L, c, T, Nx, Nt, spline_obj=cs)
        # Scale and serialize the exact solution
        settings = SimpleSerializerSettings(space_sep=",", time_sep=";")
        u_exact_scaled, vmin_exact, vmax_exact = scale_2d_array(u_exact)
        u_exact_serialized = serialize_2d_integers(u_exact_scaled, settings)
        # Extract ground truth at final time step
        final_exact = u_exact[Nt]
        quantized_gt_2d, _, _ = scale_2d_array(final_exact[np.newaxis, :], vmin_exact, vmax_exact)
        quantized_gt_2d = unscale_2d_array(quantized_gt_2d, vmin_exact, vmax_exact)
        quantized_ground_truth = quantized_gt_2d[0, :]
        # Extract input data
        rows = [row.strip() for row in u_exact_serialized.split(settings.time_sep) if row.strip()]
        train_rows = rows[:-1]
        train_serial = settings.time_sep.join(train_rows) + settings.time_sep
        # LLM prediction
        next_token, _ = generate_text_multiple(train_serial, model, tokenizer, Nx)
        parsed_data = deserialize_2d_integers(next_token, settings)
        unscaled_data = unscale_2d_array(parsed_data, vmin_exact, vmax_exact)
        # Calculate LLM errors
        max_diff = np.max(np.abs(unscaled_data - quantized_ground_truth))
        rmse = np.sqrt(np.mean((unscaled_data - quantized_ground_truth)**2))
        seed_max_diffs_llm.append(max_diff)
        seed_rmses_llm.append(rmse)
        # For wave equation, we need two previous time steps
        if Nt >= 2:
            penultimate_exact = u_exact[Nt-1]
            antepenultimate_exact = u_exact[Nt-2]
            penultimate_2d = penultimate_exact[np.newaxis, :]
            scaled_pen_2d, _, _ = scale_2d_array(penultimate_2d, vmin_exact, vmax_exact)
            penultimate_degraded = unscale_2d_array(scaled_pen_2d, vmin_exact, vmax_exact)[0]
            antepenultimate_2d = antepenultimate_exact[np.newaxis, :]
            scaled_ante_2d, _, _ = scale_2d_array(antepenultimate_2d, vmin_exact, vmax_exact)
            antepenultimate_degraded = unscale_2d_array(scaled_ante_2d, vmin_exact, vmax_exact)[0]
            predicted_leapfrog = one_step_leapfrog(antepenultimate_degraded, penultimate_degraded, c, dt, dx)
            # For Crank-Nicolson, estimate velocity with proper accuracy
            if Nt >= 3:
                # Second-order velocity estimate
                pre_ante_exact = u_exact[Nt-3]
                pre_ante_2d = pre_ante_exact[np.newaxis, :]
                scaled_pre_ante_2d, _, _ = scale_2d_array(pre_ante_2d, vmin_exact, vmax_exact)
                pre_ante_degraded = unscale_2d_array(scaled_pre_ante_2d, vmin_exact, vmax_exact)[0]
                initial_vel = estimate_velocity_bdf2(pre_ante_degraded,
                                                     antepenultimate_degraded,
                                                     penultimate_degraded, dt)
            else:
                # Fall back to first-order estimate if not enough history
                initial_vel = (penultimate_degraded - antepenultimate_degraded) / dt
            # Crank-Nicolson prediction
            _, crank_nicolson_result, _ = solve_wave_crank_nicolson(L, c, dt, Nx, 1,
                                                                    init_disp=penultimate_degraded,
                                                                    init_vel=initial_vel)
            predicted_crank_nicolson = crank_nicolson_result[1]
        else:
            predicted_leapfrog = quantized_ground_truth
            predicted_crank_nicolson = quantized_ground_truth
        # Calculate FD errors
        max_diff_leapfrog = np.max(np.abs(predicted_leapfrog - quantized_ground_truth))
        rmse_leapfrog = np.sqrt(np.mean((predicted_leapfrog - quantized_ground_truth)**2))
        seed_max_diffs_leapfrog.append(max_diff_leapfrog)
        seed_rmses_leapfrog.append(rmse_leapfrog)
        max_diff_crank_nicolson = np.max(np.abs(predicted_crank_nicolson - quantized_ground_truth))
        rmse_crank_nicolson = np.sqrt(np.mean((predicted_crank_nicolson - quantized_ground_truth)**2))
        seed_max_diffs_crank_nicolson.append(max_diff_crank_nicolson)
        seed_rmses_crank_nicolson.append(rmse_crank_nicolson)
    llm_final_max_diff.append(np.mean(seed_max_diffs_llm))
    llm_final_rmse.append(np.mean(seed_rmses_llm))
    llm_final_max_diff_std.append(np.std(seed_max_diffs_llm, ddof=1))
    llm_final_rmse_std.append(np.std(seed_rmses_llm, ddof=1))
    leapfrog_final_max_diff.append(np.mean(seed_max_diffs_leapfrog))
    leapfrog_final_rmse.append(np.mean(seed_rmses_leapfrog))
    crank_nicolson_final_max_diff.append(np.mean(seed_max_diffs_crank_nicolson))
    crank_nicolson_final_rmse.append(np.mean(seed_rmses_crank_nicolson))

# Compute quant floor for temporal experiment using stored initial conditions
temporal_baseline_max_errors = []
temporal_baseline_rmse_errors = []
for Nt in all_Nt_values:
    seed_baseline_max_errors = []
    seed_baseline_rmse_errors = []
    for seed in range(n_seeds):
        init_cond_random = stored_initial_conditions[seed]
        cs = stored_spline_objects[seed]
        # Compute exact solution for this specific (Nx, Nt) pair and initial condition
        u_exact = compute_exact_solution_random_ic_vary_Nx(L, c, T, Nx, Nt, spline_obj=cs)
        # Quantization pipeline
        u_exact_scaled, vmin_exact, vmax_exact = scale_2d_array(u_exact)
        u_exact_serialized = serialize_2d_integers(u_exact_scaled, settings)
        u_exact_parsed = deserialize_2d_integers(u_exact_serialized, settings)
        u_exact_unscaled = unscale_2d_array(u_exact_parsed, vmin_exact, vmax_exact)
        # Compute baseline errors for this seed
        baseline_max_error = np.max(np.abs(u_exact - u_exact_unscaled))
        baseline_rmse_error = np.sqrt(np.mean((u_exact - u_exact_unscaled)**2))
        seed_baseline_max_errors.append(baseline_max_error)
        seed_baseline_rmse_errors.append(baseline_rmse_error)
    # Average across seeds
    temporal_baseline_max_errors.append(np.mean(seed_baseline_max_errors))
    temporal_baseline_rmse_errors.append(np.mean(seed_baseline_rmse_errors))
temporal_baseline_max_errors = np.array(temporal_baseline_max_errors)
temporal_baseline_rmse_errors = np.array(temporal_baseline_rmse_errors)

In [None]:
def log_ci(mean, std, n, tcrit):
    """
    95% CI for log10 axis using delta method
    mean : arithmetic mean of the n samples
    std : sample std of the n samples
    n : number of samples
    tcrit: two-sided t critical value
    """
    se = std / np.sqrt(n)  # SE in linear space
    se_log = se / (mean * np.log(10))  # delta-method SE in log space
    mean_log = np.log10(mean)
    delta_log = tcrit * se_log
    return 10**(mean_log - delta_log), 10**(mean_log + delta_log)

llm_final_max_diff = np.array(llm_final_max_diff)
llm_final_rmse = np.array(llm_final_rmse)
llm_final_max_diff_std = np.array(llm_final_max_diff_std)
llm_final_rmse_std = np.array(llm_final_rmse_std)
# Calculate log-scale confidence intervals
t_critical = stats.t.ppf(0.975, df=n_seeds-1)
lower_max_diff_log, upper_max_diff_log = log_ci(llm_final_max_diff, llm_final_max_diff_std, n_seeds, t_critical)
lower_rmse_log, upper_rmse_log = log_ci(llm_final_rmse, llm_final_rmse_std, n_seeds, t_critical)

In [None]:
np.savez_compressed(
    "8B_1_step_time_discretization.npz",
    # LLM metrics
    llm_final_max_diff_8B = llm_final_max_diff,
    llm_final_rmse_8B = llm_final_rmse,
    llm_final_max_diff_std_8B = llm_final_max_diff_std,
    llm_final_rmse_std_8B = llm_final_rmse_std,
    # Log-scale confidence intervals
    llm_lower_max_diff_log_8B = lower_max_diff_log,
    llm_upper_max_diff_log_8B = upper_max_diff_log,
    llm_lower_rmse_log_8B = lower_rmse_log,
    llm_upper_rmse_log_8B = upper_rmse_log,
    # Finite difference metrics
    leapfrog_final_max_diff = leapfrog_final_max_diff,
    leapfrog_final_rmse = leapfrog_final_rmse,
    crank_nicolson_final_max_diff = crank_nicolson_final_max_diff,
    crank_nicolson_final_rmse = crank_nicolson_final_rmse,
    # Baseline metrics
    temporal_baseline_max_errors=temporal_baseline_max_errors,
    temporal_baseline_rmse_errors=temporal_baseline_rmse_errors,
    initial_conditions = stored_initial_conditions_array,
    all_Nt_values = list(all_Nt_values),
    n_seeds = n_seeds,
    t_critical = t_critical
)

In [None]:
# Range of Nx values to test
all_Nx_values = range(2, 41, 2)
llm_final_max_diff = []
llm_final_rmse = []
llm_final_max_diff_std = []
llm_final_rmse_std = []
leapfrog_final_max_diff = []
leapfrog_final_rmse = []
crank_nicolson_final_max_diff = []
crank_nicolson_final_rmse = []

# Fixed parameters
Nt = 50       # Fixed number of time steps
Nx_base = 14  # Base resolution for generating initial conditions
for Nx in tqdm(all_Nx_values):
    dt = T / Nt
    dx = L / (Nx + 1)
    # Collect errors for all initial conditions
    seed_max_diffs_llm = []
    seed_rmses_llm = []
    seed_max_diffs_leapfrog = []
    seed_rmses_leapfrog = []
    seed_max_diffs_crank_nicolson = []
    seed_rmses_crank_nicolson = []
    # Variables to store previous valid results as fallback
    prev_max_diff = None
    prev_rmse = None
    for seed in range(n_seeds):
        # Use stored base initial condition and spline from temporal exploration
        init_cond_random_base = stored_initial_conditions[seed]
        cs = stored_spline_objects[seed]
        if Nx == Nx_base:
            init_cond_current = init_cond_random_base
        else:
            # Sample from the spline at new resolution
            fig2, cs_same, init_cond_current = plot_both_grids(L, Nx_base, Nx, init_cond_random_base)
            plt.close(fig2)
        # Compute exact solution for this Nx using the sampled initial condition
        u_exact = compute_exact_solution_random_ic_vary_Nx(L, c, T, Nx, Nt, spline_obj=cs)
        # Scale and serialize the exact solution
        settings = SimpleSerializerSettings(space_sep=",", time_sep=";")
        u_exact_scaled, vmin_exact, vmax_exact = scale_2d_array(u_exact)
        u_exact_serialized = serialize_2d_integers(u_exact_scaled, settings)
        # Extract ground truth at final time step
        final_exact = u_exact[Nt]
        quantized_gt_2d, _, _ = scale_2d_array(final_exact[np.newaxis, :], vmin_exact, vmax_exact)
        quantized_gt_2d = unscale_2d_array(quantized_gt_2d, vmin_exact, vmax_exact)
        quantized_ground_truth = quantized_gt_2d[0, :]
        # Extract input data
        rows = [row.strip() for row in u_exact_serialized.split(settings.time_sep) if row.strip()]
        train_rows = rows[:-1]
        train_serial = settings.time_sep.join(train_rows) + settings.time_sep
        # LLM prediction
        valid_prediction = False
        max_retries = 10
        for attempt in range(max_retries):
            next_token, _ = generate_text_multiple(train_serial, model, tokenizer, Nx)
            parsed_data = deserialize_2d_integers(next_token, settings)
            if parsed_data.shape[1] == Nx:
                valid_prediction = True
                break
            print(f"Attempt {attempt+1} failed for Nx={Nx}, seed={seed}: Got shape {parsed_data.shape}, expected second dim to be {Nx}")  
        if not valid_prediction:
            if prev_max_diff is not None and prev_rmse is not None:
                print(f"Failed to get valid prediction for Nx={Nx}, seed={seed} after {max_retries} attempts, use results from previous seed")
                max_diff = prev_max_diff
                rmse = prev_rmse
            else:
                print(f"Failed to get valid prediction for Nx={Nx}, seed={seed} after {max_retries} attempts, and no previous results available. Skipping.")
                continue
        else:
            unscaled_data = unscale_2d_array(parsed_data, vmin_exact, vmax_exact)
            unscaled_data_flat = unscaled_data.flatten()
            # Calculate LLM errors
            max_diff = np.max(np.abs(unscaled_data_flat - quantized_ground_truth))
            rmse = np.sqrt(np.mean((unscaled_data_flat - quantized_ground_truth)**2))
            prev_max_diff = max_diff
            prev_rmse = rmse
        seed_max_diffs_llm.append(max_diff)
        seed_rmses_llm.append(rmse)
        # Get the last three time steps for velocity estimation
        penultimate_exact = u_exact[Nt-1]
        antepenultimate_exact = u_exact[Nt-2]
        pre_ante_exact = u_exact[Nt-3]
        penultimate_2d = penultimate_exact[np.newaxis, :]
        scaled_pen_2d, _, _ = scale_2d_array(penultimate_2d, vmin_exact, vmax_exact)
        penultimate_degraded = unscale_2d_array(scaled_pen_2d, vmin_exact, vmax_exact)[0]
        antepenultimate_2d = antepenultimate_exact[np.newaxis, :]
        scaled_ante_2d, _, _ = scale_2d_array(antepenultimate_2d, vmin_exact, vmax_exact)
        antepenultimate_degraded = unscale_2d_array(scaled_ante_2d, vmin_exact, vmax_exact)[0]
        pre_ante_2d = pre_ante_exact[np.newaxis, :]
        scaled_pre_ante_2d, _, _ = scale_2d_array(pre_ante_2d, vmin_exact, vmax_exact)
        pre_ante_degraded = unscale_2d_array(scaled_pre_ante_2d, vmin_exact, vmax_exact)[0]
        # FD predictions
        predicted_leapfrog = one_step_leapfrog(antepenultimate_degraded, penultimate_degraded, c, dt, dx)
        initial_vel = estimate_velocity_bdf2(pre_ante_degraded, antepenultimate_degraded, penultimate_degraded, dt)
        _, crank_nicolson_result, _ = solve_wave_crank_nicolson(L, c, dt, Nx, 1, nit_disp=penultimate_degraded, init_vel=initial_vel)
        predicted_crank_nicolson = crank_nicolson_result[1]
        # Calculate FD errors
        max_diff_leapfrog = np.max(np.abs(predicted_leapfrog - quantized_ground_truth))
        rmse_leapfrog = np.sqrt(np.mean((predicted_leapfrog - quantized_ground_truth)**2))
        seed_max_diffs_leapfrog.append(max_diff_leapfrog)
        seed_rmses_leapfrog.append(rmse_leapfrog)
        max_diff_crank_nicolson = np.max(np.abs(predicted_crank_nicolson - quantized_ground_truth))
        rmse_crank_nicolson = np.sqrt(np.mean((predicted_crank_nicolson - quantized_ground_truth)**2))
        seed_max_diffs_crank_nicolson.append(max_diff_crank_nicolson)
        seed_rmses_crank_nicolson.append(rmse_crank_nicolson)
    llm_final_max_diff.append(np.mean(seed_max_diffs_llm))
    llm_final_rmse.append(np.mean(seed_rmses_llm))
    llm_final_max_diff_std.append(np.std(seed_max_diffs_llm, ddof=1) if len(seed_max_diffs_llm) > 1 else 0)
    llm_final_rmse_std.append(np.std(seed_rmses_llm, ddof=1) if len(seed_rmses_llm) > 1 else 0)
    leapfrog_final_max_diff.append(np.mean(seed_max_diffs_leapfrog))
    leapfrog_final_rmse.append(np.mean(seed_rmses_leapfrog))
    crank_nicolson_final_max_diff.append(np.mean(seed_max_diffs_crank_nicolson))
    crank_nicolson_final_rmse.append(np.mean(seed_rmses_crank_nicolson))

# Compute baselines for spatial experiment using stored splines
spatial_baseline_max_errors = []
spatial_baseline_rmse_errors = []
for Nx in all_Nx_values:
    seed_baseline_max_errors = []
    seed_baseline_rmse_errors = []
    for seed in range(n_seeds):
        # Use stored base initial condition and spline
        init_cond_random_base = stored_initial_conditions[seed]
        cs = stored_spline_objects[seed]
        # Sample at current Nx resolution
        if Nx == Nx_base:
            init_cond_current = init_cond_random_base
        else:
            fig2, cs_same, init_cond_current = plot_both_grids(L, Nx_base, Nx, init_cond_random_base)
            plt.close(fig2)
        # Compute exact solution
        u_exact = compute_exact_solution_random_ic_vary_Nx(L, c, T, Nx, Nt, spline_obj=cs)
        # Quantization pipeline
        u_exact_scaled, vmin_exact, vmax_exact = scale_2d_array(u_exact)
        u_exact_serialized = serialize_2d_integers(u_exact_scaled, settings)
        u_exact_parsed = deserialize_2d_integers(u_exact_serialized, settings)
        u_exact_unscaled = unscale_2d_array(u_exact_parsed, vmin_exact, vmax_exact)
        # Compute baseline errors
        baseline_max_error = np.max(np.abs(u_exact - u_exact_unscaled))
        baseline_rmse_error = np.sqrt(np.mean((u_exact - u_exact_unscaled)**2))
        seed_baseline_max_errors.append(baseline_max_error)
        seed_baseline_rmse_errors.append(baseline_rmse_error)
    # Average across seeds
    spatial_baseline_max_errors.append(np.mean(seed_baseline_max_errors))
    spatial_baseline_rmse_errors.append(np.mean(seed_baseline_rmse_errors))

epsilon=1e-4    # Avoid zero errors in log-log plots by enforcing a small minimum value
spatial_baseline_max_errors = np.array(spatial_baseline_max_errors)
spatial_baseline_rmse_errors = np.array(spatial_baseline_rmse_errors)
llm_final_max_diff = np.maximum(np.array(llm_final_max_diff), epsilon)
llm_final_rmse = np.maximum(np.array(llm_final_rmse), epsilon)
llm_final_max_diff_std = np.array(llm_final_max_diff_std)
llm_final_rmse_std = np.array(llm_final_rmse_std)
# Calculate log-scale confidence intervals
lower_max_diff_log, upper_max_diff_log = log_ci(llm_final_max_diff, llm_final_max_diff_std, n_seeds, t_critical)
lower_rmse_log, upper_rmse_log = log_ci(llm_final_rmse, llm_final_rmse_std, n_seeds, t_critical)

In [None]:
np.savez_compressed(
    "8B_1_step_space_discretization.npz",
    # LLM metrics
    llm_final_max_diff_8B = llm_final_max_diff,
    llm_final_rmse_8B = llm_final_rmse,
    llm_final_max_diff_std_8B = llm_final_max_diff_std,
    llm_final_rmse_std_8B = llm_final_rmse_std,
    # Log-scale confidence intervals
    llm_lower_max_diff_log_8B = lower_max_diff_log,
    llm_upper_max_diff_log_8B = upper_max_diff_log,
    llm_lower_rmse_log_8B = lower_rmse_log,
    llm_upper_rmse_log_8B = upper_rmse_log,
    # Finite difference metrics
    leapfrog_final_max_diff_8B = leapfrog_final_max_diff,
    leapfrog_final_rmse_8B = leapfrog_final_rmse,
    crank_nicolson_final_max_diff_8B = crank_nicolson_final_max_diff,
    crank_nicolson_final_rmse_8B = crank_nicolson_final_rmse,
    # Baseline metrics
    spatial_baseline_max_errors = spatial_baseline_max_errors,
    spatial_baseline_rmse_errors = spatial_baseline_rmse_errors,
    n_seeds = n_seeds,
    t_critical = t_critical,
    all_Nx_values = list(all_Nx_values)
)